Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * numa.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) #include <inttypes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) /* For the CLR_() macros */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <pthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <subcmd/parse-options.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include "../util/cloexec.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include "bench.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <stdio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <assert.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <malloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <stdlib.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <unistd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <sys/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <sys/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <sys/resource.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <sys/wait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <sys/prctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <sys/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include <linux/time64.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #include <linux/numa.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) #include <linux/zalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #include <numa.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #include <numaif.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #ifndef RUSAGE_THREAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) # define RUSAGE_THREAD 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45)  * Regular printout to the terminal, supressed if -q is specified:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) #define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50)  * Debug printf:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) #undef dprintf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) #define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) struct thread_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 	int			curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 	cpu_set_t		bind_cpumask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 	int			bind_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 	u8			*process_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 	int			process_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 	int			thread_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 	int			task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) 	unsigned int		loops_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 	u64			val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) 	u64			runtime_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 	u64			system_time_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 	u64			user_time_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 	double			speed_gbs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 	pthread_mutex_t		*process_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) /* Parameters set by options: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) struct params {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 	/* Startup synchronization: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 	bool			serialize_startup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 	/* Task hierarchy: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	int			nr_proc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	int			nr_threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	/* Working set sizes: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	const char		*mb_global_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	const char		*mb_proc_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	const char		*mb_proc_locked_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	const char		*mb_thread_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	double			mb_global;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	double			mb_proc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	double			mb_proc_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	double			mb_thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	/* Access patterns to the working set: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 	bool			data_reads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 	bool			data_writes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	bool			data_backwards;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 	bool			data_zero_memset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 	bool			data_rand_walk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	u32			nr_loops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 	u32			nr_secs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	u32			sleep_usecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	/* Working set initialization: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	bool			init_zero;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 	bool			init_random;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	bool			init_cpu0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	/* Misc options: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	int			show_details;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	int			run_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	int			thp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	long			bytes_global;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 	long			bytes_process;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	long			bytes_process_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	long			bytes_thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	int			nr_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	bool			show_quiet;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	bool			show_convergence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	bool			measure_convergence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	int			perturb_secs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	int			nr_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	int			nr_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	/* Affinity options -C and -N: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	char			*cpu_list_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	char			*node_list_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) /* Global, read-writable area, accessible to all processes and threads: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) struct global_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	u8			*data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 	pthread_mutex_t		startup_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 	pthread_cond_t		startup_cond;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	int			nr_tasks_started;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	pthread_mutex_t		start_work_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	pthread_cond_t		start_work_cond;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	int			nr_tasks_working;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	bool			start_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	pthread_mutex_t		stop_work_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 	u64			bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	struct thread_data	*threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	/* Convergence latency measurement: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	bool			all_converged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	bool			stop_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 	int			print_once;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	struct params		p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) static struct global_info	*g = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) struct params p0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) static const struct option options[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	OPT_INTEGER('p', "nr_proc"	, &p0.nr_proc,		"number of processes"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 	OPT_INTEGER('t', "nr_threads"	, &p0.nr_threads,	"number of threads per process"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 	OPT_STRING('G', "mb_global"	, &p0.mb_global_str,	"MB", "global  memory (MBs)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	OPT_STRING('P', "mb_proc"	, &p0.mb_proc_str,	"MB", "process memory (MBs)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 	OPT_STRING('T', "mb_thread"	, &p0.mb_thread_str,	"MB", "thread  memory (MBs)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	OPT_UINTEGER('l', "nr_loops"	, &p0.nr_loops,		"max number of loops to run (default: unlimited)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 	OPT_UINTEGER('s', "nr_secs"	, &p0.nr_secs,		"max number of seconds to run (default: 5 secs)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	OPT_UINTEGER('u', "usleep"	, &p0.sleep_usecs,	"usecs to sleep per loop iteration"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	OPT_BOOLEAN('R', "data_reads"	, &p0.data_reads,	"access the data via reads (can be mixed with -W)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 	OPT_BOOLEAN('W', "data_writes"	, &p0.data_writes,	"access the data via writes (can be mixed with -R)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards,	"access the data backwards as well"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk,	"access the data with random (32bit LFSR) walk"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 	OPT_BOOLEAN('z', "init_zero"	, &p0.init_zero,	"bzero the initial allocations"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	OPT_BOOLEAN('I', "init_random"	, &p0.init_random,	"randomize the contents of the initial allocations"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 	OPT_BOOLEAN('0', "init_cpu0"	, &p0.init_cpu0,	"do the initial allocations on CPU#0"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs,	"perturb thread 0/0 every X secs, to test convergence stability"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 	OPT_INCR   ('d', "show_details"	, &p0.show_details,	"Show details"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 	OPT_INCR   ('a', "all"		, &p0.run_all,		"Run all tests in the suite"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 	OPT_INTEGER('H', "thp"		, &p0.thp,		"MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 		    "convergence is reached when each process (all its threads) is running on a single NUMA node."),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 	OPT_BOOLEAN('m', "measure_convergence",	&p0.measure_convergence, "measure convergence latency"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 	OPT_BOOLEAN('q', "quiet"	, &p0.show_quiet,	"quiet mode"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 	OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 	/* Special option string parsing callbacks: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204)         OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 			"bind the first N tasks to these specific cpus (the rest is unbound)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 			parse_cpus_opt),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207)         OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 			"bind the first N tasks to these specific memory nodes (the rest is unbound)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 			parse_nodes_opt),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 	OPT_END()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) static const char * const bench_numa_usage[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	"perf bench numa <options>",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) static const char * const numa_usage[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	"perf bench numa mem [<options>]",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 	NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224)  * To get number of numa nodes present.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) static int nr_numa_nodes(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	int i, nr_nodes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	for (i = 0; i < g->p.nr_nodes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 		if (numa_bitmask_isbitset(numa_nodes_ptr, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 			nr_nodes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	return nr_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239)  * To check if given numa node is present.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) static int is_node_present(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	return numa_bitmask_isbitset(numa_nodes_ptr, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247)  * To check given numa node has cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) static bool node_has_cpus(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	struct bitmask *cpumask = numa_allocate_cpumask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	bool ret = false; /* fall back to nocpus */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 	BUG_ON(!cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 	if (!numa_node_to_cpus(node, cpumask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 		for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 			if (numa_bitmask_isbitset(cpumask, cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 				ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	numa_free_cpumask(cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) static cpu_set_t bind_to_cpu(int target_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 	cpu_set_t orig_mask, mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 	BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	CPU_ZERO(&mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 	if (target_cpu == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 		int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 			CPU_SET(cpu, &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 		BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 		CPU_SET(target_cpu, &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 	ret = sched_setaffinity(0, sizeof(mask), &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 	BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	return orig_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) static cpu_set_t bind_to_node(int target_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	cpu_set_t orig_mask, mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 	CPU_ZERO(&mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	if (target_node == NUMA_NO_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 			CPU_SET(cpu, &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 		struct bitmask *cpumask = numa_allocate_cpumask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 		BUG_ON(!cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 		if (!numa_node_to_cpus(target_node, cpumask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 			for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 				if (numa_bitmask_isbitset(cpumask, cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 					CPU_SET(cpu, &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 		numa_free_cpumask(cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	ret = sched_setaffinity(0, sizeof(mask), &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 	BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	return orig_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) static void bind_to_cpumask(cpu_set_t mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 	ret = sched_setaffinity(0, sizeof(mask), &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) static void mempol_restore(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 	BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) static void bind_to_memnode(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 	unsigned long nodemask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 	if (node == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	nodemask = 1L << node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) #define HPSIZE (2*1024*1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) #define set_taskname(fmt...)				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) do {							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 	char name[20];					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 							\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 	snprintf(name, 20, fmt);			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	prctl(PR_SET_NAME, name);			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) static u8 *alloc_data(ssize_t bytes0, int map_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 		      int init_zero, int init_cpu0, int thp, int init_random)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	cpu_set_t orig_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	ssize_t bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 	u8 *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	if (!bytes0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	/* Allocate and initialize all memory on CPU#0: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 	if (init_cpu0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 		int node = numa_node_of_cpu(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 		orig_mask = bind_to_node(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 		bind_to_memnode(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	bytes = bytes0 + HPSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	BUG_ON(buf == (void *)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 	if (map_flags == MAP_PRIVATE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 		if (thp > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 			ret = madvise(buf, bytes, MADV_HUGEPAGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 			if (ret && !g->print_once) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 				g->print_once = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 				printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 		if (thp < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 			ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 			if (ret && !g->print_once) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 				g->print_once = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 				printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	if (init_zero) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 		bzero(buf, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 		/* Initialize random contents, different in each word: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 		if (init_random) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 			u64 *wbuf = (void *)buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 			long off = rand();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 			long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 			for (i = 0; i < bytes/8; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 				wbuf[i] = i + off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	/* Align to 2MB boundary: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 	buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	/* Restore affinity: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	if (init_cpu0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 		bind_to_cpumask(orig_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 		mempol_restore();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	return buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) static void free_data(void *data, ssize_t bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	if (!data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	ret = munmap(data, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 	BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451)  * Create a shared memory buffer that can be shared between processes, zeroed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) static void * zalloc_shared_data(ssize_t bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 	return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0,  g->p.thp, g->p.init_random);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459)  * Create a shared memory buffer that can be shared between processes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) static void * setup_shared_data(ssize_t bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0,  g->p.thp, g->p.init_random);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467)  * Allocate process-local memory - this will either be shared between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468)  * threads of this process, or only be accessed by this thread:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) static void * setup_private_data(ssize_t bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0,  g->p.thp, g->p.init_random);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476)  * Return a process-shared (global) mutex:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) static void init_global_mutex(pthread_mutex_t *mutex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	pthread_mutexattr_t attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	pthread_mutexattr_init(&attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	pthread_mutex_init(mutex, &attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488)  * Return a process-shared (global) condition variable:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) static void init_global_cond(pthread_cond_t *cond)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	pthread_condattr_t attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	pthread_condattr_init(&attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 	pthread_cond_init(cond, &attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) static int parse_cpu_list(const char *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	p0.cpu_list_str = strdup(arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 	dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) static int parse_setup_cpu_list(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	char *str0, *str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 	int t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	if (!g->p.cpu_list_str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	str0 = str = strdup(g->p.cpu_list_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	t = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	BUG_ON(!str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 	tprintf("# binding tasks to CPUs:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 	tprintf("#  ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 		int bind_cpu, bind_cpu_0, bind_cpu_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 		char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 		int bind_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 		int step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 		int mul;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 		tok = strsep(&str, ",");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 		if (!tok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 		tok_end = strstr(tok, "-");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 		dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 		if (!tok_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 			/* Single CPU specified: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 			bind_cpu_0 = bind_cpu_1 = atol(tok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 			/* CPU range specified (for example: "5-11"): */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 			bind_cpu_0 = atol(tok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 			bind_cpu_1 = atol(tok_end + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 		step = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 		tok_step = strstr(tok, "#");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 		if (tok_step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 			step = atol(tok_step + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 			BUG_ON(step <= 0 || step >= g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 		 * Mask length.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 		 * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 		 * where the _4 means the next 4 CPUs are allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 		bind_len = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 		tok_len = strstr(tok, "_");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 		if (tok_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 			bind_len = atol(tok_len + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 			BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 		/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 		mul = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 		tok_mul = strstr(tok, "x");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 		if (tok_mul) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 			mul = atol(tok_mul + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 			BUG_ON(mul <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 		dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 		if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 			printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 			return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 		BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 		BUG_ON(bind_cpu_0 > bind_cpu_1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 		for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 			int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 			for (i = 0; i < mul; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 				int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 				if (t >= g->p.nr_tasks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 					printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 					goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 				td = g->threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 				if (t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 					tprintf(",");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 				if (bind_len > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 					tprintf("%2d/%d", bind_cpu, bind_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 				} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 					tprintf("%2d", bind_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 				CPU_ZERO(&td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 				for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 					BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 					CPU_SET(cpu, &td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 				t++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 	tprintf("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	if (t < g->p.nr_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 		printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	free(str0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) static int parse_cpus_opt(const struct option *opt __maybe_unused,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 			  const char *arg, int unset __maybe_unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	if (!arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 	return parse_cpu_list(arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) static int parse_node_list(const char *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	p0.node_list_str = strdup(arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	dprintf("got NODE list: {%s}\n", p0.node_list_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) static int parse_setup_node_list(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 	char *str0, *str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 	int t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 	if (!g->p.node_list_str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 	str0 = str = strdup(g->p.node_list_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 	t = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	BUG_ON(!str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	tprintf("# binding tasks to NODEs:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	tprintf("# ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 		int bind_node, bind_node_0, bind_node_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 		char *tok, *tok_end, *tok_step, *tok_mul;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 		int step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 		int mul;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 		tok = strsep(&str, ",");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 		if (!tok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 		tok_end = strstr(tok, "-");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 		dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 		if (!tok_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 			/* Single NODE specified: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 			bind_node_0 = bind_node_1 = atol(tok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 			/* NODE range specified (for example: "5-11"): */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 			bind_node_0 = atol(tok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 			bind_node_1 = atol(tok_end + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		step = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 		tok_step = strstr(tok, "#");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 		if (tok_step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 			step = atol(tok_step + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 			BUG_ON(step <= 0 || step >= g->p.nr_nodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 		/* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 		mul = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 		tok_mul = strstr(tok, "x");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 		if (tok_mul) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 			mul = atol(tok_mul + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 			BUG_ON(mul <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 		dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 		if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 			printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 			return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 		BUG_ON(bind_node_0 < 0 || bind_node_1 < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 		BUG_ON(bind_node_0 > bind_node_1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 		for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 			int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 			for (i = 0; i < mul; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 				if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 					printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 					goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 				td = g->threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 				if (!t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 					tprintf(" %2d", bind_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 				else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 					tprintf(",%2d", bind_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 				td->bind_node = bind_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 				t++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	tprintf("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	if (t < g->p.nr_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 		printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	free(str0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) static int parse_nodes_opt(const struct option *opt __maybe_unused,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 			  const char *arg, int unset __maybe_unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	if (!arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 	return parse_node_list(arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) #define BIT(x) (1ul << x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) static inline uint32_t lfsr_32(uint32_t lfsr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760)  * Make sure there's real data dependency to RAM (when read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761)  * accesses are enabled), so the compiler, the CPU and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762)  * kernel (KSM, zero page, etc.) cannot optimize away RAM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763)  * accesses:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) static inline u64 access_data(u64 *data, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	if (g->p.data_reads)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 		val += *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	if (g->p.data_writes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 		*data = val + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 	return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775)  * The worker process does two types of work, a forwards going
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776)  * loop and a backwards going loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778)  * We do this so that on multiprocessor systems we do not create
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779)  * a 'train' of processing, with highly synchronized processes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780)  * skewing the whole benchmark.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	long words = bytes/sizeof(u64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	u64 *data = (void *)__data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	long chunk_0, chunk_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 	u64 *d0, *d, *d1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	long off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	BUG_ON(!data && words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	BUG_ON(data && !words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	if (!data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 		return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	/* Very simple memset() work variant: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	if (g->p.data_zero_memset && !g->p.data_rand_walk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 		bzero(data, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 		return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	/* Spread out by PID/TID nr and by loop nr: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 	chunk_0 = words/nr_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 	chunk_1 = words/g->p.nr_loops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	off = nr*chunk_0 + loop*chunk_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 	while (off >= words)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 		off -= words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 	if (g->p.data_rand_walk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 		u32 lfsr = nr + loop + val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 		int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 		for (i = 0; i < words/1024; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 			long start, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 			lfsr = lfsr_32(lfsr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 			start = lfsr % words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 			end = min(start + 1024, words-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 			if (g->p.data_zero_memset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 				bzero(data + start, (end-start) * sizeof(u64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 				for (j = start; j < end; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 					val = access_data(data + j, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 	} else if (!g->p.data_backwards || (nr + loop) & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 		/* Process data forwards: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 		d0 = data + off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 		d  = data + off + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 		d1 = data + words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 		for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 			if (unlikely(d >= d1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 				d = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 			if (unlikely(d == d0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 			val = access_data(d, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 			d++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 		/* Process data backwards: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 		d0 = data + off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 		d  = data + off - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		d1 = data + words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 		for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 			if (unlikely(d < data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 				d = data + words-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 			if (unlikely(d == d0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 			val = access_data(d, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 			d--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	unsigned int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 	cpu = sched_getcpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	g->threads[task_nr].curr_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 	prctl(0, bytes_worked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) #define MAX_NR_NODES	64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882)  * Count the number of nodes a process's threads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883)  * are spread out on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885)  * A count of 1 means that the process is compressed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886)  * to a single node. A count of g->p.nr_nodes means it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887)  * spread out on the whole system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) static int count_process_nodes(int process_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	char node_present[MAX_NR_NODES] = { 0, };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	int nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	int n, t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 	for (t = 0; t < g->p.nr_threads; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		int task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 		int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 		task_nr = process_nr*g->p.nr_threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 		td = g->threads + task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 		node = numa_node_of_cpu(td->curr_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 		if (node < 0) /* curr_cpu was likely still -1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 		node_present[node] = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	nodes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	for (n = 0; n < MAX_NR_NODES; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 		nodes += node_present[n];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	return nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919)  * Count the number of distinct process-threads a node contains.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921)  * A count of 1 means that the node contains only a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922)  * process. If all nodes on the system contain at most one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923)  * process then we are well-converged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) static int count_node_processes(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	int processes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	int t, p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	for (p = 0; p < g->p.nr_proc; p++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 		for (t = 0; t < g->p.nr_threads; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 			struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 			int task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 			int n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 			task_nr = p*g->p.nr_threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 			td = g->threads + task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 			n = numa_node_of_cpu(td->curr_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 			if (n == node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 				processes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	return processes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) static void calc_convergence_compression(int *strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 	unsigned int nodes_min, nodes_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 	int p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 	nodes_min = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 	nodes_max =  0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 	for (p = 0; p < g->p.nr_proc; p++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 		unsigned int nodes = count_process_nodes(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 		if (!nodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 			*strong = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 		nodes_min = min(nodes, nodes_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 		nodes_max = max(nodes, nodes_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 	/* Strong convergence: all threads compress on a single node: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 	if (nodes_min == 1 && nodes_max == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 		*strong = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 		*strong = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 		tprintf(" {%d-%d}", nodes_min, nodes_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) static void calc_convergence(double runtime_ns_max, double *convergence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	unsigned int loops_done_min, loops_done_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	int process_groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	int nodes[MAX_NR_NODES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	int distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 	int nr_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	int nr_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	int strong;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	int sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 	int t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 	if (!g->p.show_convergence && !g->p.measure_convergence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 	for (node = 0; node < g->p.nr_nodes; node++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 		nodes[node] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	loops_done_min = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	loops_done_max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 	for (t = 0; t < g->p.nr_tasks; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 		struct thread_data *td = g->threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 		unsigned int loops_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 		cpu = td->curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 		/* Not all threads have written it yet: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 		if (cpu < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 		node = numa_node_of_cpu(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 		nodes[node]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 		loops_done = td->loops_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 		loops_done_min = min(loops_done, loops_done_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 		loops_done_max = max(loops_done, loops_done_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	nr_max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	nr_min = g->p.nr_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	sum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	for (node = 0; node < g->p.nr_nodes; node++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 		if (!is_node_present(node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 		nr = nodes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 		nr_min = min(nr, nr_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 		nr_max = max(nr, nr_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 		sum += nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	BUG_ON(nr_min > nr_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 	BUG_ON(sum > g->p.nr_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	if (0 && (sum < g->p.nr_tasks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 	 * Count the number of distinct process groups present
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 	 * on nodes - when we are converged this will decrease
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	 * to g->p.nr_proc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	process_groups = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	for (node = 0; node < g->p.nr_nodes; node++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 		int processes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 		if (!is_node_present(node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 		processes = count_node_processes(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 		nr = nodes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 		tprintf(" %2d/%-2d", nr, processes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		process_groups += processes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 	distance = nr_max - nr_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	tprintf(" [%2d/%-2d]", distance, process_groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	tprintf(" l:%3d-%-3d (%3d)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 		loops_done_min, loops_done_max, loops_done_max-loops_done_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	if (loops_done_min && loops_done_max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 		double skew = 1.0 - (double)loops_done_min/loops_done_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 		tprintf(" [%4.1f%%]", skew * 100.0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	calc_convergence_compression(&strong);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	if (strong && process_groups == g->p.nr_proc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 		if (!*convergence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 			*convergence = runtime_ns_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 			tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 			if (g->p.measure_convergence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 				g->all_converged = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 				g->stop_work = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 		if (*convergence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 			tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 			*convergence = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 		tprintf("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) static void show_summary(double runtime_ns_max, int l, double *convergence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	tprintf("\r #  %5.1f%%  [%.1f mins]",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 		(double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	calc_convergence(runtime_ns_max, convergence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	if (g->p.show_details >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 		fflush(stdout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) static void *worker_thread(void *__tdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 	struct thread_data *td = __tdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	struct timeval start0, start, stop, diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 	int process_nr = td->process_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 	int thread_nr = td->thread_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	unsigned long last_perturbance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	int task_nr = td->task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	int details = g->p.show_details;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	int first_task, last_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	double convergence = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 	u64 val = td->val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	double runtime_ns_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	u8 *global_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	u8 *process_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	u8 *thread_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	u64 bytes_done, secs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	long work_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	u32 l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	struct rusage rusage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 	bind_to_cpumask(td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 	bind_to_memnode(td->bind_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	set_taskname("thread %d/%d", process_nr, thread_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 	global_data = g->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	process_data = td->process_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	thread_data = setup_private_data(g->p.bytes_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	bytes_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	last_task = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 		last_task = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	first_task = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	if (process_nr == 0 && thread_nr == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 		first_task = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	if (details >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 		printf("#  thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 			process_nr, thread_nr, global_data, process_data, thread_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	if (g->p.serialize_startup) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 		pthread_mutex_lock(&g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 		g->nr_tasks_started++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 		/* The last thread wakes the main process. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 		if (g->nr_tasks_started == g->p.nr_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 			pthread_cond_signal(&g->startup_cond);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 		pthread_mutex_unlock(&g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 		/* Here we will wait for the main process to start us all at once: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 		pthread_mutex_lock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 		g->start_work = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 		g->nr_tasks_working++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 		while (!g->start_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 			pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 		pthread_mutex_unlock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	gettimeofday(&start0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	start = stop = start0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	last_perturbance = start.tv_sec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	for (l = 0; l < g->p.nr_loops; l++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 		start = stop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 		if (g->stop_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 		val += do_work(global_data,  g->p.bytes_global,  process_nr, g->p.nr_proc,	l, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 		val += do_work(process_data, g->p.bytes_process, thread_nr,  g->p.nr_threads,	l, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 		val += do_work(thread_data,  g->p.bytes_thread,  0,          1,		l, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 		if (g->p.sleep_usecs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 			pthread_mutex_lock(td->process_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 			usleep(g->p.sleep_usecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 			pthread_mutex_unlock(td->process_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 		 * Amount of work to be done under a process-global lock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		if (g->p.bytes_process_locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 			pthread_mutex_lock(td->process_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 			val += do_work(process_data, g->p.bytes_process_locked, thread_nr,  g->p.nr_threads,	l, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 			pthread_mutex_unlock(td->process_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 		work_done = g->p.bytes_global + g->p.bytes_process +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 			    g->p.bytes_process_locked + g->p.bytes_thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 		update_curr_cpu(task_nr, work_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 		bytes_done += work_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 		if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 		td->loops_done = l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 		gettimeofday(&stop, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 		/* Check whether our max runtime timed out: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 		if (g->p.nr_secs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 			timersub(&stop, &start0, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 			if ((u32)diff.tv_sec >= g->p.nr_secs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 				g->stop_work = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 		/* Update the summary at most once per second: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 		if (start.tv_sec == stop.tv_sec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 		 * Perturb the first task's equilibrium every g->p.perturb_secs seconds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 		 * by migrating to CPU#0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 		if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 			cpu_set_t orig_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 			int target_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 			int this_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 			last_perturbance = stop.tv_sec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 			 * Depending on where we are running, move into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 			 * the other half of the system, to create some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 			 * real disturbance:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 			this_cpu = g->threads[task_nr].curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 			if (this_cpu < g->p.nr_cpus/2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 				target_cpu = g->p.nr_cpus-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 				target_cpu = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 			orig_mask = bind_to_cpu(target_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 			/* Here we are running on the target CPU already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 			if (details >= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 				printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 			bind_to_cpumask(orig_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 		if (details >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 			timersub(&stop, &start, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 			runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 			runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 			if (details >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 				printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 					process_nr, thread_nr, runtime_ns_max / bytes_done, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 			fflush(stdout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 		if (!last_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 		timersub(&stop, &start0, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 		runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 		runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 		show_summary(runtime_ns_max, l, &convergence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	gettimeofday(&stop, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	timersub(&stop, &start0, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	td->runtime_ns = diff.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	td->runtime_ns += diff.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	secs = td->runtime_ns / NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	getrusage(RUSAGE_THREAD, &rusage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 	td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	free_data(thread_data, g->p.bytes_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	pthread_mutex_lock(&g->stop_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 	g->bytes_done += bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 	pthread_mutex_unlock(&g->stop_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298)  * A worker process starts a couple of threads:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) static void worker_process(int process_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	pthread_mutex_t process_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 	pthread_t *pthreads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 	u8 *process_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	int task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	int t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 	pthread_mutex_init(&process_lock, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 	set_taskname("process %d", process_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 	 * Pick up the memory policy and the CPU binding of our first thread,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	 * so that we initialize memory accordingly:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 	task_nr = process_nr*g->p.nr_threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 	td = g->threads + task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 	bind_to_memnode(td->bind_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 	bind_to_cpumask(td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 	pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	process_data = setup_private_data(g->p.bytes_process);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	if (g->p.show_details >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 		printf(" # process %2d global mem: %p, process mem: %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 			process_nr, g->data, process_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 	for (t = 0; t < g->p.nr_threads; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 		task_nr = process_nr*g->p.nr_threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 		td = g->threads + task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 		td->process_data = process_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 		td->process_nr   = process_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 		td->thread_nr    = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 		td->task_nr	 = task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 		td->val          = rand();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 		td->curr_cpu	 = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 		td->process_lock = &process_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 		ret = pthread_create(pthreads + t, NULL, worker_thread, td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 		BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	for (t = 0; t < g->p.nr_threads; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348)                 ret = pthread_join(pthreads[t], NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 		BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 	free_data(process_data, g->p.bytes_process);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 	free(pthreads);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) static void print_summary(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	if (g->p.show_details < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 	printf("\n ###\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 	printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 		g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	printf(" #      %5dx %5ldMB global  shared mem operations\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 			g->p.nr_loops, g->p.bytes_global/1024/1024);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 	printf(" #      %5dx %5ldMB process shared mem operations\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 			g->p.nr_loops, g->p.bytes_process/1024/1024);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 	printf(" #      %5dx %5ldMB thread  local  mem operations\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 			g->p.nr_loops, g->p.bytes_thread/1024/1024);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	printf(" ###\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	printf("\n ###\n"); fflush(stdout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) static void init_thread_data(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 	ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 	int t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 	g->threads = zalloc_shared_data(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 	for (t = 0; t < g->p.nr_tasks; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 		struct thread_data *td = g->threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 		int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 		/* Allow all nodes by default: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 		td->bind_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 		/* Allow all CPUs by default: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 		CPU_ZERO(&td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 		for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 			CPU_SET(cpu, &td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) static void deinit_thread_data(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 	ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 	free_data(g->threads, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) static int init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 	/* Copy over options: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	g->p = p0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 	g->p.nr_cpus = numa_num_configured_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 	g->p.nr_nodes = numa_max_node() + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 	/* char array in count_process_nodes(): */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 	BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 	if (g->p.show_quiet && !g->p.show_details)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 		g->p.show_details = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 	/* Some memory should be specified: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 	if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 	if (g->p.mb_global_str) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 		g->p.mb_global = atof(g->p.mb_global_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 		BUG_ON(g->p.mb_global < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 	if (g->p.mb_proc_str) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 		g->p.mb_proc = atof(g->p.mb_proc_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 		BUG_ON(g->p.mb_proc < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 	if (g->p.mb_proc_locked_str) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 		g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 		BUG_ON(g->p.mb_proc_locked < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 		BUG_ON(g->p.mb_proc_locked > g->p.mb_proc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 	if (g->p.mb_thread_str) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 		g->p.mb_thread = atof(g->p.mb_thread_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 		BUG_ON(g->p.mb_thread < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 	BUG_ON(g->p.nr_threads <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	BUG_ON(g->p.nr_proc <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 	g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 	g->p.bytes_global		= g->p.mb_global	*1024L*1024L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 	g->p.bytes_process		= g->p.mb_proc		*1024L*1024L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 	g->p.bytes_process_locked	= g->p.mb_proc_locked	*1024L*1024L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 	g->p.bytes_thread		= g->p.mb_thread	*1024L*1024L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 	g->data = setup_shared_data(g->p.bytes_global);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 	/* Startup serialization: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 	init_global_mutex(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 	init_global_cond(&g->start_work_cond);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 	init_global_mutex(&g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	init_global_cond(&g->startup_cond);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 	init_global_mutex(&g->stop_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 	init_thread_data();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 	tprintf("#\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 	if (parse_setup_cpu_list() || parse_setup_node_list())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 	tprintf("#\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 	print_summary();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) static void deinit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 	free_data(g->data, g->p.bytes_global);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 	g->data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 	deinit_thread_data();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 	free_data(g, sizeof(*g));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	g = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489)  * Print a short or long result, depending on the verbosity setting:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) static void print_res(const char *name, double val,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 		      const char *txt_unit, const char *txt_short, const char *txt_long)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 	if (!name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 		name = "main,";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 	if (!g->p.show_quiet)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 		printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 		printf(" %14.3f %s\n", val, txt_long);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) static int __bench_numa(const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	struct timeval start, stop, diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 	u64 runtime_ns_min, runtime_ns_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 	pid_t *pids, pid, wpid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 	double delta_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 	double runtime_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 	double runtime_sec_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	double runtime_sec_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	int wait_stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 	double bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 	int i, t, p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 	if (init())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	pids = zalloc(g->p.nr_proc * sizeof(*pids));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 	pid = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 	if (g->p.serialize_startup) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 		tprintf(" #\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 		tprintf(" # Startup synchronization: ..."); fflush(stdout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	gettimeofday(&start, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	for (i = 0; i < g->p.nr_proc; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 		pid = fork();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 		dprintf(" # process %2d: PID %d\n", i, pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 		BUG_ON(pid < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 		if (!pid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 			/* Child process: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 			worker_process(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 			exit(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 		pids[i] = pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 	if (g->p.serialize_startup) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 		bool threads_ready = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 		double startup_sec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 		 * Wait for all the threads to start up. The last thread will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 		 * signal this process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 		pthread_mutex_lock(&g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 		while (g->nr_tasks_started != g->p.nr_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 			pthread_cond_wait(&g->startup_cond, &g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 		pthread_mutex_unlock(&g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 		/* Wait for all threads to be at the start_work_cond. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 		while (!threads_ready) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 			pthread_mutex_lock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 			threads_ready = (g->nr_tasks_working == g->p.nr_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 			pthread_mutex_unlock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 			if (!threads_ready)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 				usleep(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 		gettimeofday(&stop, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 		timersub(&stop, &start, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 		startup_sec = diff.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 		startup_sec += diff.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 		startup_sec /= NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 		tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 		tprintf(" #\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 		start = stop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 		/* Start all threads running. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 		pthread_mutex_lock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 		g->start_work = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 		pthread_mutex_unlock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 		pthread_cond_broadcast(&g->start_work_cond);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 		gettimeofday(&start, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	/* Parent process: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 	for (i = 0; i < g->p.nr_proc; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 		wpid = waitpid(pids[i], &wait_stat, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 		BUG_ON(wpid < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 		BUG_ON(!WIFEXITED(wait_stat));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 	runtime_ns_sum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 	runtime_ns_min = -1LL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 	for (t = 0; t < g->p.nr_tasks; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 		u64 thread_runtime_ns = g->threads[t].runtime_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 		runtime_ns_sum += thread_runtime_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 		runtime_ns_min = min(thread_runtime_ns, runtime_ns_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 	gettimeofday(&stop, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 	timersub(&stop, &start, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 	BUG_ON(bench_format != BENCH_FORMAT_DEFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	tprintf("\n ###\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	tprintf("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 	runtime_sec_max = diff.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 	runtime_sec_max += diff.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 	runtime_sec_max /= NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	runtime_sec_min = runtime_ns_min / NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 	bytes = g->bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 	runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 	if (g->p.measure_convergence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 		print_res(name, runtime_sec_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 			"secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 	print_res(name, runtime_sec_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 		"secs,", "runtime-max/thread",	"secs slowest (max) thread-runtime");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 	print_res(name, runtime_sec_min,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 		"secs,", "runtime-min/thread",	"secs fastest (min) thread-runtime");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 	print_res(name, runtime_avg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 		"secs,", "runtime-avg/thread",	"secs average thread-runtime");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 	delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 	print_res(name, delta_runtime / runtime_sec_max * 100.0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 		"%,", "spread-runtime/thread",	"% difference between max/avg runtime");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 	print_res(name, bytes / g->p.nr_tasks / 1e9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 		"GB,", "data/thread",		"GB data processed, per thread");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 	print_res(name, bytes / 1e9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 		"GB,", "data-total",		"GB data processed, total");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 	print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 		"nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 	print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 		"GB/sec,", "thread-speed",	"GB/sec/thread speed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 	print_res(name, bytes / runtime_sec_max / 1e9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 		"GB/sec,", "total-speed",	"GB/sec total speed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 	if (g->p.show_details >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 		char tname[14 + 2 * 10 + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 		struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 		for (p = 0; p < g->p.nr_proc; p++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 			for (t = 0; t < g->p.nr_threads; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 				memset(tname, 0, sizeof(tname));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 				td = g->threads + p*g->p.nr_threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 				snprintf(tname, sizeof(tname), "process%d:thread%d", p, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 				print_res(tname, td->speed_gbs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 					"GB/sec",	"thread-speed", "GB/sec/thread speed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 				print_res(tname, td->system_time_ns / NSEC_PER_SEC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 					"secs",	"thread-system-time", "system CPU time/thread");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 				print_res(tname, td->user_time_ns / NSEC_PER_SEC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 					"secs",	"thread-user-time", "user CPU time/thread");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 	free(pids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 	deinit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) #define MAX_ARGS 50
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) static int command_size(const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 	int size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 	while (*argv) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 		size++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 		argv++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 	BUG_ON(size >= MAX_ARGS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 	return size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) static void init_params(struct params *p, const char *name, int argc, const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 	printf("\n # Running %s \"perf bench numa", name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 	for (i = 0; i < argc; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 		printf(" %s", argv[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 	printf("\"\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 	memset(p, 0, sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 	/* Initialize nonzero defaults: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 	p->serialize_startup		= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 	p->data_reads			= true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 	p->data_writes			= true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 	p->data_backwards		= true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 	p->data_rand_walk		= true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 	p->nr_loops			= -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 	p->init_random			= true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	p->mb_global_str		= "1";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 	p->nr_proc			= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 	p->nr_threads			= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 	p->nr_secs			= 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 	p->run_all			= argc == 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) static int run_bench_numa(const char *name, const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 	int argc = command_size(argv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 	init_params(&p0, name, argc, argv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 	argc = parse_options(argc, argv, options, bench_numa_usage, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 	if (argc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 	if (__bench_numa(name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) #define OPT_BW_RAM		"-s",  "20", "-zZq",    "--thp", " 1", "--no-data_rand_walk"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) #define OPT_BW_RAM_NOTHP	OPT_BW_RAM,		"--thp", "-1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) #define OPT_CONV		"-s", "100", "-zZ0qcm", "--thp", " 1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) #define OPT_CONV_NOTHP		OPT_CONV,		"--thp", "-1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) #define OPT_BW			"-s",  "20", "-zZ0q",   "--thp", " 1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) #define OPT_BW_NOTHP		OPT_BW,			"--thp", "-1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756)  * The built-in test-suite executed by "perf bench numa -a".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758)  * (A minimum of 4 nodes and 16 GB of RAM is recommended.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) static const char *tests[][MAX_ARGS] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761)    /* Basic single-stream NUMA bandwidth measurements: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762)    { "RAM-bw-local,",     "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764)    { "RAM-bw-local-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 			  "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 			  "-C" ,   "0", "-M",   "0", OPT_BW_RAM_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767)    { "RAM-bw-remote,",    "mem",  "-p",  "1",  "-t",  "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 			  "-C" ,   "0", "-M",   "1", OPT_BW_RAM },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770)    /* 2-stream NUMA bandwidth measurements: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771)    { "RAM-bw-local-2x,",  "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 			   "-C", "0,2", "-M", "0x2", OPT_BW_RAM },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773)    { "RAM-bw-remote-2x,", "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 		 	   "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776)    /* Cross-stream NUMA bandwidth measurement: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777)    { "RAM-bw-cross,",     "mem",  "-p",  "2",  "-t",  "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 		 	   "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780)    /* Convergence latency measurements: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781)    { " 1x3-convergence,", "mem",  "-p",  "1", "-t",  "3", "-P",  "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782)    { " 1x4-convergence,", "mem",  "-p",  "1", "-t",  "4", "-P",  "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783)    { " 1x6-convergence,", "mem",  "-p",  "1", "-t",  "6", "-P", "1020", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784)    { " 2x3-convergence,", "mem",  "-p",  "2", "-t",  "3", "-P", "1020", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785)    { " 3x3-convergence,", "mem",  "-p",  "3", "-t",  "3", "-P", "1020", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786)    { " 4x4-convergence,", "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787)    { " 4x4-convergence-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 			  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_CONV_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789)    { " 4x6-convergence,", "mem",  "-p",  "4", "-t",  "6", "-P", "1020", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790)    { " 4x8-convergence,", "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791)    { " 8x4-convergence,", "mem",  "-p",  "8", "-t",  "4", "-P",  "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792)    { " 8x4-convergence-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 			  "mem",  "-p",  "8", "-t",  "4", "-P",  "512", OPT_CONV_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794)    { " 3x1-convergence,", "mem",  "-p",  "3", "-t",  "1", "-P",  "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795)    { " 4x1-convergence,", "mem",  "-p",  "4", "-t",  "1", "-P",  "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796)    { " 8x1-convergence,", "mem",  "-p",  "8", "-t",  "1", "-P",  "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797)    { "16x1-convergence,", "mem",  "-p", "16", "-t",  "1", "-P",  "256", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798)    { "32x1-convergence,", "mem",  "-p", "32", "-t",  "1", "-P",  "128", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800)    /* Various NUMA process/thread layout bandwidth measurements: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801)    { " 2x1-bw-process,",  "mem",  "-p",  "2", "-t",  "1", "-P", "1024", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802)    { " 3x1-bw-process,",  "mem",  "-p",  "3", "-t",  "1", "-P", "1024", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803)    { " 4x1-bw-process,",  "mem",  "-p",  "4", "-t",  "1", "-P", "1024", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804)    { " 8x1-bw-process,",  "mem",  "-p",  "8", "-t",  "1", "-P", " 512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805)    { " 8x1-bw-process-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 			  "mem",  "-p",  "8", "-t",  "1", "-P", " 512", OPT_BW_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807)    { "16x1-bw-process,",  "mem",  "-p", "16", "-t",  "1", "-P",  "256", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809)    { " 1x4-bw-thread,",   "mem",  "-p",  "1", "-t",  "4", "-T",  "256", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810)    { " 1x8-bw-thread,",   "mem",  "-p",  "1", "-t",  "8", "-T",  "256", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811)    { "1x16-bw-thread,",   "mem",  "-p",  "1", "-t", "16", "-T",  "128", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)    { "1x32-bw-thread,",   "mem",  "-p",  "1", "-t", "32", "-T",   "64", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814)    { " 2x3-bw-process,",  "mem",  "-p",  "2", "-t",  "3", "-P",  "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815)    { " 4x4-bw-process,",  "mem",  "-p",  "4", "-t",  "4", "-P",  "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816)    { " 4x6-bw-process,",  "mem",  "-p",  "4", "-t",  "6", "-P",  "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817)    { " 4x8-bw-process,",  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818)    { " 4x8-bw-process-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 			  "mem",  "-p",  "4", "-t",  "8", "-P",  "512", OPT_BW_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820)    { " 3x3-bw-process,",  "mem",  "-p",  "3", "-t",  "3", "-P",  "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821)    { " 5x5-bw-process,",  "mem",  "-p",  "5", "-t",  "5", "-P",  "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823)    { "2x16-bw-process,",  "mem",  "-p",  "2", "-t", "16", "-P",  "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824)    { "1x32-bw-process,",  "mem",  "-p",  "1", "-t", "32", "-P", "2048", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826)    { "numa02-bw,",        "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827)    { "numa02-bw-NOTHP,",  "mem",  "-p",  "1", "-t", "32", "-T",   "32", OPT_BW_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828)    { "numa01-bw-thread,", "mem",  "-p",  "2", "-t", "16", "-T",  "192", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829)    { "numa01-bw-thread-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 			  "mem",  "-p",  "2", "-t", "16", "-T",  "192", OPT_BW_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) static int bench_all(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 	int nr = ARRAY_SIZE(tests);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 	ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 	BUG_ON(ret < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 	for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 		run_bench_numa(tests[i][0], tests[i] + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 	printf("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) int bench_numa(int argc, const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 	init_params(&p0, "main,", argc, argv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 	argc = parse_options(argc, argv, options, bench_numa_usage, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 	if (argc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 	if (p0.run_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 		return bench_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 	if (__bench_numa(NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 	usage_with_options(numa_usage, options);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) }