^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * numa.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <inttypes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) /* For the CLR_() macros */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <pthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <subcmd/parse-options.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "../util/cloexec.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "bench.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <stdio.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <assert.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <malloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <stdlib.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <unistd.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <sys/mman.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <sys/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <sys/resource.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <sys/wait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <sys/prctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <sys/types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/time64.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/numa.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/zalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <numa.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <numaif.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #ifndef RUSAGE_THREAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) # define RUSAGE_THREAD 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * Regular printout to the terminal, supressed if -q is specified:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * Debug printf:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #undef dprintf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) struct thread_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) int curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) cpu_set_t bind_cpumask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) int bind_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) u8 *process_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) int process_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) int thread_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) int task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) unsigned int loops_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) u64 val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) u64 runtime_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) u64 system_time_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) u64 user_time_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) double speed_gbs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) pthread_mutex_t *process_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) /* Parameters set by options: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) struct params {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) /* Startup synchronization: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) bool serialize_startup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) /* Task hierarchy: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) int nr_proc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) int nr_threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) /* Working set sizes: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) const char *mb_global_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) const char *mb_proc_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) const char *mb_proc_locked_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) const char *mb_thread_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) double mb_global;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) double mb_proc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) double mb_proc_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) double mb_thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) /* Access patterns to the working set: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) bool data_reads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) bool data_writes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) bool data_backwards;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) bool data_zero_memset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) bool data_rand_walk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) u32 nr_loops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) u32 nr_secs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) u32 sleep_usecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) /* Working set initialization: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) bool init_zero;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) bool init_random;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) bool init_cpu0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /* Misc options: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) int show_details;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) int run_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) int thp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) long bytes_global;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) long bytes_process;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) long bytes_process_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) long bytes_thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) int nr_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) bool show_quiet;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) bool show_convergence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) bool measure_convergence;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) int perturb_secs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) int nr_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) int nr_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) /* Affinity options -C and -N: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) char *cpu_list_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) char *node_list_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /* Global, read-writable area, accessible to all processes and threads: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) struct global_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) u8 *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) pthread_mutex_t startup_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) pthread_cond_t startup_cond;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) int nr_tasks_started;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) pthread_mutex_t start_work_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) pthread_cond_t start_work_cond;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) int nr_tasks_working;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) bool start_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) pthread_mutex_t stop_work_mutex;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) u64 bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) struct thread_data *threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) /* Convergence latency measurement: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) bool all_converged;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) bool stop_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) int print_once;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) struct params p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) static struct global_info *g = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) struct params p0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) static const struct option options[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run (default: unlimited)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via reads (can be mixed with -W)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) "convergence is reached when each process (all its threads) is running on a single NUMA node."),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /* Special option string parsing callbacks: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) "bind the first N tasks to these specific cpus (the rest is unbound)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) parse_cpus_opt),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) "bind the first N tasks to these specific memory nodes (the rest is unbound)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) parse_nodes_opt),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) OPT_END()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) static const char * const bench_numa_usage[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) "perf bench numa <options>",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) static const char * const numa_usage[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) "perf bench numa mem [<options>]",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * To get number of numa nodes present.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) static int nr_numa_nodes(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) int i, nr_nodes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) for (i = 0; i < g->p.nr_nodes; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) if (numa_bitmask_isbitset(numa_nodes_ptr, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) nr_nodes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) return nr_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * To check if given numa node is present.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) static int is_node_present(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) return numa_bitmask_isbitset(numa_nodes_ptr, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * To check given numa node has cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) static bool node_has_cpus(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) struct bitmask *cpumask = numa_allocate_cpumask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) bool ret = false; /* fall back to nocpus */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) BUG_ON(!cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) if (!numa_node_to_cpus(node, cpumask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) if (numa_bitmask_isbitset(cpumask, cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) numa_free_cpumask(cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) static cpu_set_t bind_to_cpu(int target_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) cpu_set_t orig_mask, mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) CPU_ZERO(&mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) if (target_cpu == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) CPU_SET(cpu, &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) CPU_SET(target_cpu, &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) ret = sched_setaffinity(0, sizeof(mask), &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) return orig_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) static cpu_set_t bind_to_node(int target_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) cpu_set_t orig_mask, mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) CPU_ZERO(&mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) if (target_node == NUMA_NO_NODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) CPU_SET(cpu, &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) struct bitmask *cpumask = numa_allocate_cpumask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) BUG_ON(!cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) if (!numa_node_to_cpus(target_node, cpumask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) for (cpu = 0; cpu < (int)cpumask->size; cpu++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) if (numa_bitmask_isbitset(cpumask, cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) CPU_SET(cpu, &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) numa_free_cpumask(cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) ret = sched_setaffinity(0, sizeof(mask), &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) return orig_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) static void bind_to_cpumask(cpu_set_t mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) ret = sched_setaffinity(0, sizeof(mask), &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) static void mempol_restore(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) static void bind_to_memnode(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) unsigned long nodemask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) if (node == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) nodemask = 1L << node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) #define HPSIZE (2*1024*1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) #define set_taskname(fmt...) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) char name[20]; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) snprintf(name, 20, fmt); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) prctl(PR_SET_NAME, name); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) static u8 *alloc_data(ssize_t bytes0, int map_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) int init_zero, int init_cpu0, int thp, int init_random)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) cpu_set_t orig_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) ssize_t bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) u8 *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) if (!bytes0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) /* Allocate and initialize all memory on CPU#0: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) if (init_cpu0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) int node = numa_node_of_cpu(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) orig_mask = bind_to_node(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) bind_to_memnode(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) bytes = bytes0 + HPSIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) BUG_ON(buf == (void *)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) if (map_flags == MAP_PRIVATE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) if (thp > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) ret = madvise(buf, bytes, MADV_HUGEPAGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) if (ret && !g->print_once) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) g->print_once = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if (thp < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) if (ret && !g->print_once) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) g->print_once = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) if (init_zero) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) bzero(buf, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) /* Initialize random contents, different in each word: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) if (init_random) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) u64 *wbuf = (void *)buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) long off = rand();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) for (i = 0; i < bytes/8; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) wbuf[i] = i + off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) /* Align to 2MB boundary: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) /* Restore affinity: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) if (init_cpu0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) bind_to_cpumask(orig_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) mempol_restore();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) return buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) static void free_data(void *data, ssize_t bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) if (!data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) ret = munmap(data, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * Create a shared memory buffer that can be shared between processes, zeroed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) static void * zalloc_shared_data(ssize_t bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) * Create a shared memory buffer that can be shared between processes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) static void * setup_shared_data(ssize_t bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * Allocate process-local memory - this will either be shared between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * threads of this process, or only be accessed by this thread:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) static void * setup_private_data(ssize_t bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) * Return a process-shared (global) mutex:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) static void init_global_mutex(pthread_mutex_t *mutex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) pthread_mutexattr_t attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) pthread_mutexattr_init(&attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) pthread_mutex_init(mutex, &attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * Return a process-shared (global) condition variable:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) static void init_global_cond(pthread_cond_t *cond)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) pthread_condattr_t attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) pthread_condattr_init(&attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) pthread_condattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) pthread_cond_init(cond, &attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) static int parse_cpu_list(const char *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) p0.cpu_list_str = strdup(arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) static int parse_setup_cpu_list(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) char *str0, *str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) int t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) if (!g->p.cpu_list_str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) str0 = str = strdup(g->p.cpu_list_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) t = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) BUG_ON(!str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) tprintf("# binding tasks to CPUs:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) tprintf("# ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) int bind_cpu, bind_cpu_0, bind_cpu_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) int bind_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) int step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) int mul;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) tok = strsep(&str, ",");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) if (!tok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) tok_end = strstr(tok, "-");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) if (!tok_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) /* Single CPU specified: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) bind_cpu_0 = bind_cpu_1 = atol(tok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) /* CPU range specified (for example: "5-11"): */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) bind_cpu_0 = atol(tok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) bind_cpu_1 = atol(tok_end + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) step = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) tok_step = strstr(tok, "#");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) if (tok_step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) step = atol(tok_step + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) BUG_ON(step <= 0 || step >= g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) * Mask length.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) * where the _4 means the next 4 CPUs are allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) bind_len = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) tok_len = strstr(tok, "_");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) if (tok_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) bind_len = atol(tok_len + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) mul = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) tok_mul = strstr(tok, "x");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) if (tok_mul) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) mul = atol(tok_mul + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) BUG_ON(mul <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) BUG_ON(bind_cpu_0 > bind_cpu_1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) for (i = 0; i < mul; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) if (t >= g->p.nr_tasks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) td = g->threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) if (t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) tprintf(",");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) if (bind_len > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) tprintf("%2d/%d", bind_cpu, bind_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) tprintf("%2d", bind_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) CPU_ZERO(&td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) CPU_SET(cpu, &td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) t++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) tprintf("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) if (t < g->p.nr_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) free(str0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) static int parse_cpus_opt(const struct option *opt __maybe_unused,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) const char *arg, int unset __maybe_unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) if (!arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) return parse_cpu_list(arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) static int parse_node_list(const char *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) p0.node_list_str = strdup(arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) dprintf("got NODE list: {%s}\n", p0.node_list_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) static int parse_setup_node_list(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) char *str0, *str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) int t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) if (!g->p.node_list_str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) str0 = str = strdup(g->p.node_list_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) t = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) BUG_ON(!str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) tprintf("# binding tasks to NODEs:\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) tprintf("# ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) int bind_node, bind_node_0, bind_node_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) char *tok, *tok_end, *tok_step, *tok_mul;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) int step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) int mul;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) tok = strsep(&str, ",");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) if (!tok)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) tok_end = strstr(tok, "-");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) if (!tok_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) /* Single NODE specified: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) bind_node_0 = bind_node_1 = atol(tok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) /* NODE range specified (for example: "5-11"): */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) bind_node_0 = atol(tok);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) bind_node_1 = atol(tok_end + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) step = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) tok_step = strstr(tok, "#");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) if (tok_step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) step = atol(tok_step + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) BUG_ON(step <= 0 || step >= g->p.nr_nodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) mul = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) tok_mul = strstr(tok, "x");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (tok_mul) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) mul = atol(tok_mul + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) BUG_ON(mul <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) BUG_ON(bind_node_0 < 0 || bind_node_1 < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) BUG_ON(bind_node_0 > bind_node_1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) for (i = 0; i < mul; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) if (t >= g->p.nr_tasks || !node_has_cpus(bind_node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) td = g->threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) if (!t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) tprintf(" %2d", bind_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) tprintf(",%2d", bind_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) td->bind_node = bind_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) t++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) tprintf("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) if (t < g->p.nr_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) free(str0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) static int parse_nodes_opt(const struct option *opt __maybe_unused,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) const char *arg, int unset __maybe_unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) if (!arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) return parse_node_list(arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) #define BIT(x) (1ul << x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) static inline uint32_t lfsr_32(uint32_t lfsr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) * Make sure there's real data dependency to RAM (when read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) * accesses are enabled), so the compiler, the CPU and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) * kernel (KSM, zero page, etc.) cannot optimize away RAM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) * accesses:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) static inline u64 access_data(u64 *data, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) if (g->p.data_reads)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) val += *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) if (g->p.data_writes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) *data = val + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) * The worker process does two types of work, a forwards going
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) * loop and a backwards going loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) * We do this so that on multiprocessor systems we do not create
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * a 'train' of processing, with highly synchronized processes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * skewing the whole benchmark.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) long words = bytes/sizeof(u64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) u64 *data = (void *)__data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) long chunk_0, chunk_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) u64 *d0, *d, *d1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) long off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) BUG_ON(!data && words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) BUG_ON(data && !words);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) if (!data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) /* Very simple memset() work variant: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) if (g->p.data_zero_memset && !g->p.data_rand_walk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) bzero(data, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) /* Spread out by PID/TID nr and by loop nr: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) chunk_0 = words/nr_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) chunk_1 = words/g->p.nr_loops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) off = nr*chunk_0 + loop*chunk_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) while (off >= words)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) off -= words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) if (g->p.data_rand_walk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) u32 lfsr = nr + loop + val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) for (i = 0; i < words/1024; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) long start, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) lfsr = lfsr_32(lfsr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) start = lfsr % words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) end = min(start + 1024, words-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) if (g->p.data_zero_memset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) bzero(data + start, (end-start) * sizeof(u64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) for (j = start; j < end; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) val = access_data(data + j, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) } else if (!g->p.data_backwards || (nr + loop) & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) /* Process data forwards: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) d0 = data + off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) d = data + off + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) d1 = data + words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) if (unlikely(d >= d1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) d = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) if (unlikely(d == d0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) val = access_data(d, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) d++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) /* Process data backwards: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) d0 = data + off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) d = data + off - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) d1 = data + words;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) if (unlikely(d < data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) d = data + words-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) if (unlikely(d == d0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) val = access_data(d, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) d--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) unsigned int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) cpu = sched_getcpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) g->threads[task_nr].curr_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) prctl(0, bytes_worked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) #define MAX_NR_NODES 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) * Count the number of nodes a process's threads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) * are spread out on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) * A count of 1 means that the process is compressed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) * to a single node. A count of g->p.nr_nodes means it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) * spread out on the whole system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) static int count_process_nodes(int process_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) char node_present[MAX_NR_NODES] = { 0, };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) int nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) int n, t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) for (t = 0; t < g->p.nr_threads; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) int task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) task_nr = process_nr*g->p.nr_threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) td = g->threads + task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) node = numa_node_of_cpu(td->curr_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) if (node < 0) /* curr_cpu was likely still -1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) node_present[node] = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) nodes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) for (n = 0; n < MAX_NR_NODES; n++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) nodes += node_present[n];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) return nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) * Count the number of distinct process-threads a node contains.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) * A count of 1 means that the node contains only a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) * process. If all nodes on the system contain at most one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) * process then we are well-converged.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) static int count_node_processes(int node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) int processes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) int t, p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) for (p = 0; p < g->p.nr_proc; p++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) for (t = 0; t < g->p.nr_threads; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) int task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) int n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) task_nr = p*g->p.nr_threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) td = g->threads + task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) n = numa_node_of_cpu(td->curr_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) if (n == node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) processes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) return processes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) static void calc_convergence_compression(int *strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) unsigned int nodes_min, nodes_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) int p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) nodes_min = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) nodes_max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) for (p = 0; p < g->p.nr_proc; p++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) unsigned int nodes = count_process_nodes(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) if (!nodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) *strong = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) nodes_min = min(nodes, nodes_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) nodes_max = max(nodes, nodes_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) /* Strong convergence: all threads compress on a single node: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) if (nodes_min == 1 && nodes_max == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) *strong = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) *strong = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) tprintf(" {%d-%d}", nodes_min, nodes_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) static void calc_convergence(double runtime_ns_max, double *convergence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) unsigned int loops_done_min, loops_done_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) int process_groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) int nodes[MAX_NR_NODES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) int distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) int nr_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) int nr_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) int strong;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) int sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) int t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) if (!g->p.show_convergence && !g->p.measure_convergence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) for (node = 0; node < g->p.nr_nodes; node++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) nodes[node] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) loops_done_min = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) loops_done_max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) for (t = 0; t < g->p.nr_tasks; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) struct thread_data *td = g->threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) unsigned int loops_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) cpu = td->curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) /* Not all threads have written it yet: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) if (cpu < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) node = numa_node_of_cpu(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) nodes[node]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) loops_done = td->loops_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) loops_done_min = min(loops_done, loops_done_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) loops_done_max = max(loops_done, loops_done_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) nr_max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) nr_min = g->p.nr_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) sum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) for (node = 0; node < g->p.nr_nodes; node++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) if (!is_node_present(node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) nr = nodes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) nr_min = min(nr, nr_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) nr_max = max(nr, nr_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) sum += nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) BUG_ON(nr_min > nr_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) BUG_ON(sum > g->p.nr_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) if (0 && (sum < g->p.nr_tasks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) * Count the number of distinct process groups present
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) * on nodes - when we are converged this will decrease
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) * to g->p.nr_proc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) process_groups = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) for (node = 0; node < g->p.nr_nodes; node++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) int processes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) if (!is_node_present(node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) processes = count_node_processes(node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) nr = nodes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) tprintf(" %2d/%-2d", nr, processes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) process_groups += processes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) distance = nr_max - nr_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) tprintf(" [%2d/%-2d]", distance, process_groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) tprintf(" l:%3d-%-3d (%3d)",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) loops_done_min, loops_done_max, loops_done_max-loops_done_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) if (loops_done_min && loops_done_max) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) double skew = 1.0 - (double)loops_done_min/loops_done_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) tprintf(" [%4.1f%%]", skew * 100.0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) calc_convergence_compression(&strong);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) if (strong && process_groups == g->p.nr_proc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) if (!*convergence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) *convergence = runtime_ns_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) tprintf(" (%6.1fs converged)\n", *convergence / NSEC_PER_SEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) if (g->p.measure_convergence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) g->all_converged = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) g->stop_work = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) if (*convergence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) tprintf(" (%6.1fs de-converged)", runtime_ns_max / NSEC_PER_SEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) *convergence = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) tprintf("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) static void show_summary(double runtime_ns_max, int l, double *convergence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) tprintf("\r # %5.1f%% [%.1f mins]",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max / NSEC_PER_SEC / 60.0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) calc_convergence(runtime_ns_max, convergence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) if (g->p.show_details >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) fflush(stdout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) static void *worker_thread(void *__tdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) struct thread_data *td = __tdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) struct timeval start0, start, stop, diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) int process_nr = td->process_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) int thread_nr = td->thread_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) unsigned long last_perturbance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) int task_nr = td->task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) int details = g->p.show_details;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) int first_task, last_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) double convergence = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) u64 val = td->val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) double runtime_ns_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) u8 *global_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) u8 *process_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) u8 *thread_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) u64 bytes_done, secs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) long work_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) u32 l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) struct rusage rusage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) bind_to_cpumask(td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) bind_to_memnode(td->bind_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) set_taskname("thread %d/%d", process_nr, thread_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) global_data = g->data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) process_data = td->process_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) thread_data = setup_private_data(g->p.bytes_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) bytes_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) last_task = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) last_task = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) first_task = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) if (process_nr == 0 && thread_nr == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) first_task = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) if (details >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) printf("# thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) process_nr, thread_nr, global_data, process_data, thread_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) if (g->p.serialize_startup) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) pthread_mutex_lock(&g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) g->nr_tasks_started++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) /* The last thread wakes the main process. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) if (g->nr_tasks_started == g->p.nr_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) pthread_cond_signal(&g->startup_cond);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) pthread_mutex_unlock(&g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) /* Here we will wait for the main process to start us all at once: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) pthread_mutex_lock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) g->start_work = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) g->nr_tasks_working++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) while (!g->start_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) pthread_cond_wait(&g->start_work_cond, &g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) pthread_mutex_unlock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) gettimeofday(&start0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) start = stop = start0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) last_perturbance = start.tv_sec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) for (l = 0; l < g->p.nr_loops; l++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) start = stop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) if (g->stop_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) val += do_work(global_data, g->p.bytes_global, process_nr, g->p.nr_proc, l, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) val += do_work(process_data, g->p.bytes_process, thread_nr, g->p.nr_threads, l, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) if (g->p.sleep_usecs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) pthread_mutex_lock(td->process_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) usleep(g->p.sleep_usecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) pthread_mutex_unlock(td->process_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) * Amount of work to be done under a process-global lock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) if (g->p.bytes_process_locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) pthread_mutex_lock(td->process_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) pthread_mutex_unlock(td->process_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) work_done = g->p.bytes_global + g->p.bytes_process +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) g->p.bytes_process_locked + g->p.bytes_thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) update_curr_cpu(task_nr, work_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) bytes_done += work_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) td->loops_done = l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) gettimeofday(&stop, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) /* Check whether our max runtime timed out: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) if (g->p.nr_secs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) timersub(&stop, &start0, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) if ((u32)diff.tv_sec >= g->p.nr_secs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) g->stop_work = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) /* Update the summary at most once per second: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) if (start.tv_sec == stop.tv_sec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) * Perturb the first task's equilibrium every g->p.perturb_secs seconds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) * by migrating to CPU#0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) cpu_set_t orig_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) int target_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) int this_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) last_perturbance = stop.tv_sec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) * Depending on where we are running, move into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) * the other half of the system, to create some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) * real disturbance:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) this_cpu = g->threads[task_nr].curr_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) if (this_cpu < g->p.nr_cpus/2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) target_cpu = g->p.nr_cpus-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) target_cpu = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) orig_mask = bind_to_cpu(target_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) /* Here we are running on the target CPU already */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) if (details >= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) bind_to_cpumask(orig_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) if (details >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) timersub(&stop, &start, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) if (details >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) process_nr, thread_nr, runtime_ns_max / bytes_done, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) fflush(stdout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) if (!last_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) timersub(&stop, &start0, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) runtime_ns_max = diff.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) runtime_ns_max += diff.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) show_summary(runtime_ns_max, l, &convergence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) gettimeofday(&stop, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) timersub(&stop, &start0, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) td->runtime_ns = diff.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) td->runtime_ns += diff.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) secs = td->runtime_ns / NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) td->speed_gbs = secs ? bytes_done / secs / 1e9 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) getrusage(RUSAGE_THREAD, &rusage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) td->system_time_ns = rusage.ru_stime.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) td->system_time_ns += rusage.ru_stime.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) td->user_time_ns = rusage.ru_utime.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) td->user_time_ns += rusage.ru_utime.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) free_data(thread_data, g->p.bytes_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) pthread_mutex_lock(&g->stop_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) g->bytes_done += bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) pthread_mutex_unlock(&g->stop_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) * A worker process starts a couple of threads:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) static void worker_process(int process_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) pthread_mutex_t process_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) pthread_t *pthreads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) u8 *process_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) int task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) int t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) pthread_mutex_init(&process_lock, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) set_taskname("process %d", process_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) * Pick up the memory policy and the CPU binding of our first thread,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) * so that we initialize memory accordingly:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) task_nr = process_nr*g->p.nr_threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) td = g->threads + task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) bind_to_memnode(td->bind_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) bind_to_cpumask(td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) process_data = setup_private_data(g->p.bytes_process);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) if (g->p.show_details >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) printf(" # process %2d global mem: %p, process mem: %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) process_nr, g->data, process_data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) for (t = 0; t < g->p.nr_threads; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) task_nr = process_nr*g->p.nr_threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) td = g->threads + task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) td->process_data = process_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) td->process_nr = process_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) td->thread_nr = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) td->task_nr = task_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) td->val = rand();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) td->curr_cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) td->process_lock = &process_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) ret = pthread_create(pthreads + t, NULL, worker_thread, td);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) for (t = 0; t < g->p.nr_threads; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) ret = pthread_join(pthreads[t], NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) BUG_ON(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) free_data(process_data, g->p.bytes_process);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) free(pthreads);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) static void print_summary(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) if (g->p.show_details < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) printf("\n ###\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", nr_numa_nodes(), g->p.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) printf(" # %5dx %5ldMB global shared mem operations\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) g->p.nr_loops, g->p.bytes_global/1024/1024);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) printf(" # %5dx %5ldMB process shared mem operations\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) g->p.nr_loops, g->p.bytes_process/1024/1024);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) printf(" # %5dx %5ldMB thread local mem operations\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) g->p.nr_loops, g->p.bytes_thread/1024/1024);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) printf(" ###\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) printf("\n ###\n"); fflush(stdout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) static void init_thread_data(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) int t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) g->threads = zalloc_shared_data(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) for (t = 0; t < g->p.nr_tasks; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) struct thread_data *td = g->threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) /* Allow all nodes by default: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) td->bind_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) /* Allow all CPUs by default: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) CPU_ZERO(&td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) CPU_SET(cpu, &td->bind_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) static void deinit_thread_data(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) free_data(g->threads, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) static int init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) /* Copy over options: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) g->p = p0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) g->p.nr_cpus = numa_num_configured_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) g->p.nr_nodes = numa_max_node() + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) /* char array in count_process_nodes(): */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) if (g->p.show_quiet && !g->p.show_details)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) g->p.show_details = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) /* Some memory should be specified: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) if (g->p.mb_global_str) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) g->p.mb_global = atof(g->p.mb_global_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) BUG_ON(g->p.mb_global < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) if (g->p.mb_proc_str) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) g->p.mb_proc = atof(g->p.mb_proc_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) BUG_ON(g->p.mb_proc < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) if (g->p.mb_proc_locked_str) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) BUG_ON(g->p.mb_proc_locked < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) BUG_ON(g->p.mb_proc_locked > g->p.mb_proc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) if (g->p.mb_thread_str) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) g->p.mb_thread = atof(g->p.mb_thread_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) BUG_ON(g->p.mb_thread < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) BUG_ON(g->p.nr_threads <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) BUG_ON(g->p.nr_proc <= 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) g->p.bytes_global = g->p.mb_global *1024L*1024L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) g->p.bytes_process = g->p.mb_proc *1024L*1024L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) g->p.bytes_process_locked = g->p.mb_proc_locked *1024L*1024L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) g->p.bytes_thread = g->p.mb_thread *1024L*1024L;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) g->data = setup_shared_data(g->p.bytes_global);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) /* Startup serialization: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) init_global_mutex(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) init_global_cond(&g->start_work_cond);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) init_global_mutex(&g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) init_global_cond(&g->startup_cond);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) init_global_mutex(&g->stop_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) init_thread_data();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) tprintf("#\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) if (parse_setup_cpu_list() || parse_setup_node_list())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) tprintf("#\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) print_summary();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) static void deinit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) free_data(g->data, g->p.bytes_global);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) g->data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) deinit_thread_data();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) free_data(g, sizeof(*g));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) g = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) * Print a short or long result, depending on the verbosity setting:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) static void print_res(const char *name, double val,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) const char *txt_unit, const char *txt_short, const char *txt_long)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) if (!name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) name = "main,";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) if (!g->p.show_quiet)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) printf(" %14.3f %s\n", val, txt_long);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) static int __bench_numa(const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) struct timeval start, stop, diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) u64 runtime_ns_min, runtime_ns_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) pid_t *pids, pid, wpid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) double delta_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) double runtime_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) double runtime_sec_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) double runtime_sec_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) int wait_stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) double bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) int i, t, p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) if (init())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) pids = zalloc(g->p.nr_proc * sizeof(*pids));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) pid = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) if (g->p.serialize_startup) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) tprintf(" #\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) tprintf(" # Startup synchronization: ..."); fflush(stdout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) gettimeofday(&start, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) for (i = 0; i < g->p.nr_proc; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) pid = fork();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) dprintf(" # process %2d: PID %d\n", i, pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) BUG_ON(pid < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) if (!pid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) /* Child process: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) worker_process(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) exit(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) pids[i] = pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) if (g->p.serialize_startup) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) bool threads_ready = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) double startup_sec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) * Wait for all the threads to start up. The last thread will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) * signal this process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) pthread_mutex_lock(&g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) while (g->nr_tasks_started != g->p.nr_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) pthread_cond_wait(&g->startup_cond, &g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) pthread_mutex_unlock(&g->startup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) /* Wait for all threads to be at the start_work_cond. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) while (!threads_ready) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) pthread_mutex_lock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) threads_ready = (g->nr_tasks_working == g->p.nr_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) pthread_mutex_unlock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) if (!threads_ready)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) usleep(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) gettimeofday(&stop, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) timersub(&stop, &start, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) startup_sec = diff.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) startup_sec += diff.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) startup_sec /= NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) tprintf(" #\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) start = stop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) /* Start all threads running. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) pthread_mutex_lock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) g->start_work = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) pthread_mutex_unlock(&g->start_work_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) pthread_cond_broadcast(&g->start_work_cond);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) gettimeofday(&start, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) /* Parent process: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) for (i = 0; i < g->p.nr_proc; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) wpid = waitpid(pids[i], &wait_stat, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) BUG_ON(wpid < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) BUG_ON(!WIFEXITED(wait_stat));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) runtime_ns_sum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) runtime_ns_min = -1LL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) for (t = 0; t < g->p.nr_tasks; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) u64 thread_runtime_ns = g->threads[t].runtime_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) runtime_ns_sum += thread_runtime_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) runtime_ns_min = min(thread_runtime_ns, runtime_ns_min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) gettimeofday(&stop, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) timersub(&stop, &start, &diff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) BUG_ON(bench_format != BENCH_FORMAT_DEFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) tprintf("\n ###\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) tprintf("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) runtime_sec_max = diff.tv_sec * NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) runtime_sec_max += diff.tv_usec * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) runtime_sec_max /= NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) runtime_sec_min = runtime_ns_min / NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) bytes = g->bytes_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / NSEC_PER_SEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) if (g->p.measure_convergence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) print_res(name, runtime_sec_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) "secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) print_res(name, runtime_sec_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) "secs,", "runtime-max/thread", "secs slowest (max) thread-runtime");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) print_res(name, runtime_sec_min,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) "secs,", "runtime-min/thread", "secs fastest (min) thread-runtime");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) print_res(name, runtime_avg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) "secs,", "runtime-avg/thread", "secs average thread-runtime");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) print_res(name, delta_runtime / runtime_sec_max * 100.0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) "%,", "spread-runtime/thread", "% difference between max/avg runtime");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) print_res(name, bytes / g->p.nr_tasks / 1e9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) "GB,", "data/thread", "GB data processed, per thread");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) print_res(name, bytes / 1e9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) "GB,", "data-total", "GB data processed, total");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) print_res(name, runtime_sec_max * NSEC_PER_SEC / (bytes / g->p.nr_tasks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) "nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) "GB/sec,", "thread-speed", "GB/sec/thread speed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) print_res(name, bytes / runtime_sec_max / 1e9,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) "GB/sec,", "total-speed", "GB/sec total speed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) if (g->p.show_details >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) char tname[14 + 2 * 10 + 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) struct thread_data *td;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) for (p = 0; p < g->p.nr_proc; p++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) for (t = 0; t < g->p.nr_threads; t++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) memset(tname, 0, sizeof(tname));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) td = g->threads + p*g->p.nr_threads + t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) snprintf(tname, sizeof(tname), "process%d:thread%d", p, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) print_res(tname, td->speed_gbs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) "GB/sec", "thread-speed", "GB/sec/thread speed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) print_res(tname, td->system_time_ns / NSEC_PER_SEC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) "secs", "thread-system-time", "system CPU time/thread");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) print_res(tname, td->user_time_ns / NSEC_PER_SEC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) "secs", "thread-user-time", "user CPU time/thread");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) free(pids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) deinit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) #define MAX_ARGS 50
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) static int command_size(const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) int size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) while (*argv) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) size++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) argv++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) BUG_ON(size >= MAX_ARGS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) return size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) static void init_params(struct params *p, const char *name, int argc, const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) printf("\n # Running %s \"perf bench numa", name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) for (i = 0; i < argc; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) printf(" %s", argv[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) printf("\"\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) memset(p, 0, sizeof(*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) /* Initialize nonzero defaults: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) p->serialize_startup = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) p->data_reads = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) p->data_writes = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) p->data_backwards = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) p->data_rand_walk = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) p->nr_loops = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) p->init_random = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) p->mb_global_str = "1";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) p->nr_proc = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) p->nr_threads = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) p->nr_secs = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) p->run_all = argc == 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) static int run_bench_numa(const char *name, const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) int argc = command_size(argv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) init_params(&p0, name, argc, argv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) argc = parse_options(argc, argv, options, bench_numa_usage, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) if (argc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) if (__bench_numa(name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) #define OPT_BW_RAM "-s", "20", "-zZq", "--thp", " 1", "--no-data_rand_walk"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) #define OPT_BW_RAM_NOTHP OPT_BW_RAM, "--thp", "-1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) #define OPT_CONV "-s", "100", "-zZ0qcm", "--thp", " 1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) #define OPT_CONV_NOTHP OPT_CONV, "--thp", "-1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) #define OPT_BW "-s", "20", "-zZ0q", "--thp", " 1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) #define OPT_BW_NOTHP OPT_BW, "--thp", "-1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) * The built-in test-suite executed by "perf bench numa -a".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) * (A minimum of 4 nodes and 16 GB of RAM is recommended.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) static const char *tests[][MAX_ARGS] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) /* Basic single-stream NUMA bandwidth measurements: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) "-C" , "0", "-M", "0", OPT_BW_RAM },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) { "RAM-bw-local-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) "mem", "-p", "1", "-t", "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) "-C" , "0", "-M", "1", OPT_BW_RAM },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) /* 2-stream NUMA bandwidth measurements: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) { "RAM-bw-local-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) "-C", "0,2", "-M", "0x2", OPT_BW_RAM },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) { "RAM-bw-remote-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) /* Cross-stream NUMA bandwidth measurement: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) { "RAM-bw-cross,", "mem", "-p", "2", "-t", "1", "-P", "1024",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) /* Convergence latency measurements: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) { " 2x3-convergence,", "mem", "-p", "2", "-t", "3", "-P", "1020", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) { " 4x4-convergence-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) { " 4x6-convergence,", "mem", "-p", "4", "-t", "6", "-P", "1020", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) { " 4x8-convergence,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) { " 8x4-convergence,", "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) { " 8x4-convergence-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) { " 3x1-convergence,", "mem", "-p", "3", "-t", "1", "-P", "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) { " 4x1-convergence,", "mem", "-p", "4", "-t", "1", "-P", "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) { " 8x1-convergence,", "mem", "-p", "8", "-t", "1", "-P", "512", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) { "16x1-convergence,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) { "32x1-convergence,", "mem", "-p", "32", "-t", "1", "-P", "128", OPT_CONV },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) /* Various NUMA process/thread layout bandwidth measurements: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) { " 2x1-bw-process,", "mem", "-p", "2", "-t", "1", "-P", "1024", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) { " 3x1-bw-process,", "mem", "-p", "3", "-t", "1", "-P", "1024", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) { " 4x1-bw-process,", "mem", "-p", "4", "-t", "1", "-P", "1024", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) { " 8x1-bw-process,", "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) { " 8x1-bw-process-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) { " 1x4-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) { " 1x8-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) { "1x16-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) { " 2x3-bw-process,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) { " 4x4-bw-process,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) { " 4x6-bw-process,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) { " 4x8-bw-process,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) { " 4x8-bw-process-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) { " 3x3-bw-process,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) { " 5x5-bw-process,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) { "2x16-bw-process,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) { "1x32-bw-process,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) { "numa01-bw-thread-NOTHP,",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW_NOTHP },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) static int bench_all(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) int nr = ARRAY_SIZE(tests);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) BUG_ON(ret < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) run_bench_numa(tests[i][0], tests[i] + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) printf("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) int bench_numa(int argc, const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) init_params(&p0, "main,", argc, argv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) argc = parse_options(argc, argv, options, bench_numa_usage, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) if (argc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) if (p0.run_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) return bench_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) if (__bench_numa(NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) usage_with_options(numa_usage, options);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) }