^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * This program is particularly useful to measure the latency of nthread wakeups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * in non-error situations: all waiters are queued and all wake calls wakeup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * one or more tasks, and thus the waitqueue is never empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) /* For the CLR_() macros */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <pthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "../util/stat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <subcmd/parse-options.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/compiler.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/time64.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <internal/cpumap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <perf/cpumap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include "bench.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include "futex.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <stdlib.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <sys/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) /* all threads will block on the same futex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static u_int32_t futex1 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * How many wakeups to do at a time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * Default to 1 in order to make the kernel work more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) static unsigned int nwakes = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) pthread_t *worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) static bool done = false, silent = false, fshared = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) static pthread_mutex_t thread_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) static pthread_cond_t thread_parent, thread_worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) static struct stats waketime_stats, wakeup_stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static unsigned int threads_starting, nthreads = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) static int futex_flag = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) static const struct option options[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) OPT_END()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) static const char * const bench_futex_wake_usage[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) "perf bench futex wake <options>",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) static void *workerfn(void *arg __maybe_unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) pthread_mutex_lock(&thread_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) threads_starting--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) if (!threads_starting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) pthread_cond_signal(&thread_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) pthread_cond_wait(&thread_worker, &thread_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) pthread_mutex_unlock(&thread_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) pthread_exit(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) static void print_summary(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) double waketime_avg = avg_stats(&waketime_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) double waketime_stddev = stddev_stats(&waketime_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) unsigned int wakeup_avg = avg_stats(&wakeup_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) wakeup_avg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) nthreads,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) waketime_avg / USEC_PER_MSEC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) rel_stddev_stats(waketime_stddev, waketime_avg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) static void block_threads(pthread_t *w,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) pthread_attr_t thread_attr, struct perf_cpu_map *cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) cpu_set_t cpuset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) unsigned int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) threads_starting = nthreads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) /* create and block all threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) for (i = 0; i < nthreads; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) CPU_ZERO(&cpuset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) CPU_SET(cpu->map[i % cpu->nr], &cpuset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) err(EXIT_FAILURE, "pthread_create");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) static void toggle_done(int sig __maybe_unused,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) siginfo_t *info __maybe_unused,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) void *uc __maybe_unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) int bench_futex_wake(int argc, const char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) unsigned int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) struct sigaction act;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) pthread_attr_t thread_attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) struct perf_cpu_map *cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if (argc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) usage_with_options(bench_futex_wake_usage, options);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) exit(EXIT_FAILURE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) cpu = perf_cpu_map__new(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) if (!cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) err(EXIT_FAILURE, "calloc");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) memset(&act, 0, sizeof(act));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) sigfillset(&act.sa_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) act.sa_sigaction = toggle_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) sigaction(SIGINT, &act, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) if (!nthreads)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) nthreads = cpu->nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) worker = calloc(nthreads, sizeof(*worker));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) if (!worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) err(EXIT_FAILURE, "calloc");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (!fshared)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) futex_flag = FUTEX_PRIVATE_FLAG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) printf("Run summary [PID %d]: blocking on %d threads (at [%s] futex %p), "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) "waking up %d at a time.\n\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) getpid(), nthreads, fshared ? "shared":"private", &futex1, nwakes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) init_stats(&wakeup_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) init_stats(&waketime_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) pthread_attr_init(&thread_attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) pthread_mutex_init(&thread_lock, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) pthread_cond_init(&thread_parent, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) pthread_cond_init(&thread_worker, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) for (j = 0; j < bench_repeat && !done; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) unsigned int nwoken = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) struct timeval start, end, runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) /* create, launch & block all threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) block_threads(worker, thread_attr, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) /* make sure all threads are already blocked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) pthread_mutex_lock(&thread_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) while (threads_starting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) pthread_cond_wait(&thread_parent, &thread_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) pthread_cond_broadcast(&thread_worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) pthread_mutex_unlock(&thread_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) usleep(100000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) /* Ok, all threads are patiently blocked, start waking folks up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) gettimeofday(&start, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) while (nwoken != nthreads)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) nwoken += futex_wake(&futex1, nwakes, futex_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) gettimeofday(&end, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) timersub(&end, &start, &runtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) update_stats(&wakeup_stats, nwoken);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) update_stats(&waketime_stats, runtime.tv_usec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (!silent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) j + 1, nwoken, nthreads, runtime.tv_usec / (double)USEC_PER_MSEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) for (i = 0; i < nthreads; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) ret = pthread_join(worker[i], NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) err(EXIT_FAILURE, "pthread_join");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) /* cleanup & report results */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) pthread_cond_destroy(&thread_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) pthread_cond_destroy(&thread_worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) pthread_mutex_destroy(&thread_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) pthread_attr_destroy(&thread_attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) print_summary();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) free(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) perf_cpu_map__put(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }