^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/trace_clock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "trace_benchmark.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) static struct task_struct *bm_event_thread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) static char bm_str[BENCHMARK_EVENT_STRLEN] = "START";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) static u64 bm_total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) static u64 bm_totalsq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) static u64 bm_last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) static u64 bm_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) static u64 bm_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) static u64 bm_first;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) static u64 bm_cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) static u64 bm_stddev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) static unsigned int bm_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) static unsigned int bm_std;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) static bool ok_to_run;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * This gets called in a loop recording the time it took to write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * the tracepoint. What it writes is the time statistics of the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * tracepoint write. As there is nothing to write the first time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * it simply writes "START". As the first write is cold cache and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * the rest is hot, we save off that time in bm_first and it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * reported as "first", which is shown in the second write to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * tracepoint. The "first" field is writen within the statics from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * then on but never changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) static void trace_do_benchmark(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) u64 start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) u64 stop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) u64 delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) u64 stddev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) u64 seed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) u64 last_seed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) unsigned int avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) unsigned int std = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) /* Only run if the tracepoint is actually active */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) if (!trace_benchmark_event_enabled() || !tracing_is_on())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) local_irq_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) start = trace_clock_local();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) trace_benchmark_event(bm_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) stop = trace_clock_local();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) bm_cnt++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) delta = stop - start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * The first read is cold cached, keep it separate from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * other calculations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) if (bm_cnt == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) bm_first = delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) "first=%llu [COLD CACHED]", bm_first);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) bm_last = delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) if (delta > bm_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) bm_max = delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) if (!bm_min || delta < bm_min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) bm_min = delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * When bm_cnt is greater than UINT_MAX, it breaks the statistics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * accounting. Freeze the statistics when that happens.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * We should have enough data for the avg and stddev anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) if (bm_cnt > UINT_MAX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) "last=%llu first=%llu max=%llu min=%llu ** avg=%u std=%d std^2=%lld",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) bm_last, bm_first, bm_max, bm_min, bm_avg, bm_std, bm_stddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) bm_total += delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) bm_totalsq += delta * delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) if (bm_cnt > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * Apply Welford's method to calculate standard deviation:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * s^2 = 1 / (n * (n-1)) * (n * \Sum (x_i)^2 - (\Sum x_i)^2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) stddev = (u64)bm_cnt * bm_totalsq - bm_total * bm_total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) do_div(stddev, (u32)bm_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) do_div(stddev, (u32)bm_cnt - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) stddev = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) delta = bm_total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) do_div(delta, bm_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) avg = delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) if (stddev > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) int i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * stddev is the square of standard deviation but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * we want the actualy number. Use the average
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * as our seed to find the std.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * The next try is:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * x = (x + N/x) / 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * Where N is the squared number to find the square
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * root of.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) seed = avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) last_seed = seed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) seed = stddev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) if (!last_seed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) do_div(seed, last_seed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) seed += last_seed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) do_div(seed, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) } while (i++ < 10 && last_seed != seed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) std = seed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) scnprintf(bm_str, BENCHMARK_EVENT_STRLEN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) "last=%llu first=%llu max=%llu min=%llu avg=%u std=%d std^2=%lld",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) bm_last, bm_first, bm_max, bm_min, avg, std, stddev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) bm_std = std;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) bm_avg = avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) bm_stddev = stddev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) static int benchmark_event_kthread(void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) /* sleep a bit to make sure the tracepoint gets activated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) msleep(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) while (!kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) trace_do_benchmark();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * We don't go to sleep, but let others run as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * This is bascially a "yield()" to let any task that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * wants to run, schedule in, but if the CPU is idle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * we'll keep burning cycles.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * Note the tasks_rcu_qs() version of cond_resched() will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * notify synchronize_rcu_tasks() that this thread has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * passed a quiescent state for rcu_tasks. Otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * this thread will never voluntarily schedule which would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * block synchronize_rcu_tasks() indefinitely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) cond_resched_tasks_rcu_qs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * When the benchmark tracepoint is enabled, it calls this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * function and the thread that calls the tracepoint is created.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) int trace_benchmark_reg(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) if (!ok_to_run) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) pr_warn("trace benchmark cannot be started via kernel command line\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) bm_event_thread = kthread_run(benchmark_event_kthread,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) NULL, "event_benchmark");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) if (IS_ERR(bm_event_thread)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) pr_warn("trace benchmark failed to create kernel thread\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) return PTR_ERR(bm_event_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * When the benchmark tracepoint is disabled, it calls this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * function and the thread that calls the tracepoint is deleted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * and all the numbers are reset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) void trace_benchmark_unreg(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) if (!bm_event_thread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) kthread_stop(bm_event_thread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) bm_event_thread = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) strcpy(bm_str, "START");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) bm_total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) bm_totalsq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) bm_last = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) bm_max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) bm_min = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) bm_cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /* These don't need to be reset but reset them anyway */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) bm_first = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) bm_std = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) bm_avg = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) bm_stddev = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) static __init int ok_to_run_trace_benchmark(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) ok_to_run = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) early_initcall(ok_to_run_trace_benchmark);