^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * trace_hwlat.c - A simple Hardware Latency detector.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Use this tracer to detect large system latencies induced by the behavior of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * certain underlying system hardware or firmware, independent of Linux itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * The code was developed originally to detect the presence of SMIs on Intel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * and AMD systems, although there is no dependency upon x86 herein.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * The classical example usage of this tracer is in detecting the presence of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * somewhat special form of hardware interrupt spawned from earlier CPU debug
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * LPC (or other device) to generate a special interrupt under certain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * circumstances, for example, upon expiration of a special SMI timer device,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * due to certain external thermal readings, on certain I/O address accesses,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * and other situations. An SMI hits a special CPU pin, triggers a special
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * SMI mode (complete with special memory map), and the OS is unaware.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * Although certain hardware-inducing latencies are necessary (for example,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * a modern system often requires an SMI handler for correct thermal control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * and remote management) they can wreak havoc upon any OS-level performance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * guarantees toward low-latency, especially when the OS is not even made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * aware of the presence of these interrupts. For this reason, we need a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * somewhat brute force mechanism to detect these interrupts. In this case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * we do it by hogging all of the CPU(s) for configurable timer intervals,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * sampling the built-in CPU timer, looking for discontiguous readings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * WARNING: This implementation necessarily introduces latencies. Therefore,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * you should NEVER use this tracer while running in a production
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * environment requiring any kind of low-latency performance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * guarantee(s).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * Copyright (C) 2013-2016 Steven Rostedt, Red Hat, Inc. <srostedt@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * Includes useful feedback from Clark Williams <clark@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <linux/tracefs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <linux/cpumask.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include <linux/sched/clock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include "trace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) static struct trace_array *hwlat_trace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #define U64STR_SIZE 22 /* 20 digits max */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #define BANNER "hwlat_detector: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #define DEFAULT_LAT_THRESHOLD 10 /* 10us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) /* sampling thread*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) static struct task_struct *hwlat_kthread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) static struct dentry *hwlat_sample_width; /* sample width us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) static struct dentry *hwlat_sample_window; /* sample window us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /* Save the previous tracing_thresh value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) static unsigned long save_tracing_thresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /* NMI timestamp counters */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) static u64 nmi_ts_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) static u64 nmi_total_ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) static int nmi_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) static int nmi_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) /* Tells NMIs to call back to the hwlat tracer to record timestamps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) bool trace_hwlat_callback_enabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) /* If the user changed threshold, remember it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) static u64 last_tracing_thresh = DEFAULT_LAT_THRESHOLD * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) /* Individual latency samples are stored here when detected. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) struct hwlat_sample {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) u64 seqnum; /* unique sequence */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) u64 duration; /* delta */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) u64 outer_duration; /* delta (outer loop) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) u64 nmi_total_ts; /* Total time spent in NMIs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) struct timespec64 timestamp; /* wall time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) int nmi_count; /* # NMIs during this sample */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) int count; /* # of iteratons over threash */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /* keep the global state somewhere. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) static struct hwlat_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) struct mutex lock; /* protect changes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) u64 count; /* total since reset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) u64 sample_window; /* total sampling window (on+off) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) u64 sample_width; /* active sampling portion of window */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) } hwlat_data = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) .sample_window = DEFAULT_SAMPLE_WINDOW,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) .sample_width = DEFAULT_SAMPLE_WIDTH,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) static void trace_hwlat_sample(struct hwlat_sample *sample)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) struct trace_array *tr = hwlat_trace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) struct trace_event_call *call = &event_hwlat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) struct trace_buffer *buffer = tr->array_buffer.buffer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) struct ring_buffer_event *event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) struct hwlat_entry *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) int pc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) pc = preempt_count();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) local_save_flags(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) event = trace_buffer_lock_reserve(buffer, TRACE_HWLAT, sizeof(*entry),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) flags, pc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) if (!event)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) entry = ring_buffer_event_data(event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) entry->seqnum = sample->seqnum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) entry->duration = sample->duration;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) entry->outer_duration = sample->outer_duration;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) entry->timestamp = sample->timestamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) entry->nmi_total_ts = sample->nmi_total_ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) entry->nmi_count = sample->nmi_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) entry->count = sample->count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if (!call_filter_check_discard(call, entry, buffer, event))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) trace_buffer_unlock_commit_nostack(buffer, event);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /* Macros to encapsulate the time capturing infrastructure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) #define time_type u64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) #define time_get() trace_clock_local()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) #define time_to_us(x) div_u64(x, 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) #define time_sub(a, b) ((a) - (b))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) #define init_time(a, b) (a = b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) #define time_u64(a) a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) void trace_hwlat_callback(bool enter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) if (smp_processor_id() != nmi_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * Currently trace_clock_local() calls sched_clock() and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * generic version is not NMI safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (!IS_ENABLED(CONFIG_GENERIC_SCHED_CLOCK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) if (enter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) nmi_ts_start = time_get();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) nmi_total_ts += time_get() - nmi_ts_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) if (enter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) nmi_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * get_sample - sample the CPU TSC and look for likely hardware latencies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * Used to repeatedly capture the CPU TSC (or similar), looking for potential
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * hardware-induced latency. Called with interrupts disabled and with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * hwlat_data.lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) static int get_sample(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) struct trace_array *tr = hwlat_trace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) struct hwlat_sample s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) time_type start, t1, t2, last_t2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) s64 diff, outer_diff, total, last_total = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) u64 sample = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) u64 thresh = tracing_thresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) u64 outer_sample = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) int ret = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) unsigned int count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) do_div(thresh, NSEC_PER_USEC); /* modifies interval value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) nmi_cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) nmi_total_ts = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) nmi_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) /* Make sure NMIs see this first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) trace_hwlat_callback_enabled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) init_time(last_t2, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) start = time_get(); /* start timestamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) outer_diff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) t1 = time_get(); /* we'll look for a discontinuity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) t2 = time_get();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) if (time_u64(last_t2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) /* Check the delta from outer loop (t2 to next t1) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) outer_diff = time_to_us(time_sub(t1, last_t2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /* This shouldn't happen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) if (outer_diff < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) pr_err(BANNER "time running backwards\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) if (outer_diff > outer_sample)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) outer_sample = outer_diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) last_t2 = t2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) total = time_to_us(time_sub(t2, start)); /* sample width */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /* Check for possible overflows */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (total < last_total) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) pr_err("Time total overflowed\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) last_total = total;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) /* This checks the inner loop (t1 to t2) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) diff = time_to_us(time_sub(t2, t1)); /* current diff */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) if (diff > thresh || outer_diff > thresh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) if (!count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) ktime_get_real_ts64(&s.timestamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) /* This shouldn't happen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) if (diff < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) pr_err(BANNER "time running backwards\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) if (diff > sample)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) sample = diff; /* only want highest value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) } while (total <= hwlat_data.sample_width);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) barrier(); /* finish the above in the view for NMIs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) trace_hwlat_callback_enabled = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) barrier(); /* Make sure nmi_total_ts is no longer updated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) /* If we exceed the threshold value, we have found a hardware latency */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) if (sample > thresh || outer_sample > thresh) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) u64 latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) /* We read in microseconds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) if (nmi_total_ts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) do_div(nmi_total_ts, NSEC_PER_USEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) hwlat_data.count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) s.seqnum = hwlat_data.count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) s.duration = sample;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) s.outer_duration = outer_sample;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) s.nmi_total_ts = nmi_total_ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) s.nmi_count = nmi_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) s.count = count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) trace_hwlat_sample(&s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) latency = max(sample, outer_sample);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) /* Keep a running maximum ever recorded hardware latency */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) if (latency > tr->max_latency) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) tr->max_latency = latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) latency_fsnotify(tr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) static struct cpumask save_cpumask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) static bool disable_migrate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) static void move_to_next_cpu(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) struct cpumask *current_mask = &save_cpumask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) struct trace_array *tr = hwlat_trace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) int next_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) if (disable_migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * If for some reason the user modifies the CPU affinity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * of this thread, then stop migrating for the duration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * of the current test.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (!cpumask_equal(current_mask, current->cpus_ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) goto disable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) get_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) next_cpu = cpumask_next(smp_processor_id(), current_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) put_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) if (next_cpu >= nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) next_cpu = cpumask_first(current_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) if (next_cpu >= nr_cpu_ids) /* Shouldn't happen! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) goto disable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) cpumask_clear(current_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) cpumask_set_cpu(next_cpu, current_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) sched_setaffinity(0, current_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) disable:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) disable_migrate = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * kthread_fn - The CPU time sampling/hardware latency detection kernel thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * Used to periodically sample the CPU TSC via a call to get_sample. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * disable interrupts, which does (intentionally) introduce latency since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * need to ensure nothing else might be running (and thus preempting).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * Obviously this should never be used in production environments.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) * Executes one loop interaction on each CPU in tracing_cpumask sysfs file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) static int kthread_fn(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) u64 interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) while (!kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) move_to_next_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) local_irq_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) get_sample();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) mutex_lock(&hwlat_data.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) interval = hwlat_data.sample_window - hwlat_data.sample_width;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) mutex_unlock(&hwlat_data.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) do_div(interval, USEC_PER_MSEC); /* modifies interval value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) /* Always sleep for at least 1ms */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (interval < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) interval = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) if (msleep_interruptible(interval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * start_kthread - Kick off the hardware latency sampling/detector kthread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) * This starts the kernel thread that will sit and sample the CPU timestamp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * counter (TSC or similar) and look for potential hardware latencies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) static int start_kthread(struct trace_array *tr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) struct cpumask *current_mask = &save_cpumask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) struct task_struct *kthread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) int next_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) if (hwlat_kthread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) /* Just pick the first CPU on first iteration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) get_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) cpumask_and(current_mask, cpu_online_mask, tr->tracing_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) put_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) next_cpu = cpumask_first(current_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) kthread = kthread_create(kthread_fn, NULL, "hwlatd");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) if (IS_ERR(kthread)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) pr_err(BANNER "could not start sampling thread\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) cpumask_clear(current_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) cpumask_set_cpu(next_cpu, current_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) sched_setaffinity(kthread->pid, current_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) hwlat_kthread = kthread;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) wake_up_process(kthread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) * stop_kthread - Inform the hardware latency samping/detector kthread to stop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) * This kicks the running hardware latency sampling/detector kernel thread and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) * tells it to stop sampling now. Use this on unload and at system shutdown.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) static void stop_kthread(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if (!hwlat_kthread)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) kthread_stop(hwlat_kthread);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) hwlat_kthread = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) * hwlat_read - Wrapper read function for reading both window and width
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) * @filp: The active open file structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) * @ubuf: The userspace provided buffer to read value into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) * @cnt: The maximum number of bytes to read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * @ppos: The current "file" position
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * This function provides a generic read implementation for the global state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) * "hwlat_data" structure filesystem entries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) static ssize_t hwlat_read(struct file *filp, char __user *ubuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) size_t cnt, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) char buf[U64STR_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) u64 *entry = filp->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) u64 val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) if (!entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) if (cnt > sizeof(buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) cnt = sizeof(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) val = *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) len = snprintf(buf, sizeof(buf), "%llu\n", val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * hwlat_width_write - Write function for "width" entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) * @filp: The active open file structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) * @ubuf: The user buffer that contains the value to write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * @cnt: The maximum number of bytes to write to "file"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * @ppos: The current position in @file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * This function provides a write implementation for the "width" interface
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * to the hardware latency detector. It can be used to configure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * for how many us of the total window us we will actively sample for any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * hardware-induced latency periods. Obviously, it is not possible to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * sample constantly and have the system respond to a sample reader, or,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * worse, without having the system appear to have gone out to lunch. It
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * is enforced that width is less that the total window size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) static ssize_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) hwlat_width_write(struct file *filp, const char __user *ubuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) size_t cnt, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) u64 val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) err = kstrtoull_from_user(ubuf, cnt, 10, &val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) mutex_lock(&hwlat_data.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) if (val < hwlat_data.sample_window)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) hwlat_data.sample_width = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) mutex_unlock(&hwlat_data.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) return cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) * hwlat_window_write - Write function for "window" entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) * @filp: The active open file structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * @ubuf: The user buffer that contains the value to write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) * @cnt: The maximum number of bytes to write to "file"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) * @ppos: The current position in @file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * This function provides a write implementation for the "window" interface
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * to the hardware latency detetector. The window is the total time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * in us that will be considered one sample period. Conceptually, windows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * occur back-to-back and contain a sample width period during which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) * actual sampling occurs. Can be used to write a new total window size. It
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) * is enfoced that any value written must be greater than the sample width
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * size, or an error results.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) static ssize_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) hwlat_window_write(struct file *filp, const char __user *ubuf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) size_t cnt, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) u64 val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) err = kstrtoull_from_user(ubuf, cnt, 10, &val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) mutex_lock(&hwlat_data.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) if (hwlat_data.sample_width < val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) hwlat_data.sample_window = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) mutex_unlock(&hwlat_data.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) return cnt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) static const struct file_operations width_fops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) .open = tracing_open_generic,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) .read = hwlat_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) .write = hwlat_width_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) static const struct file_operations window_fops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) .open = tracing_open_generic,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) .read = hwlat_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) .write = hwlat_window_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) * init_tracefs - A function to initialize the tracefs interface files
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) * This function creates entries in tracefs for "hwlat_detector".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) * It creates the hwlat_detector directory in the tracing directory,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) * and within that directory is the count, width and window files to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) * change and view those values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) static int init_tracefs(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) struct dentry *top_dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) ret = tracing_init_dentry();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) top_dir = tracefs_create_dir("hwlat_detector", NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) if (!top_dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) hwlat_sample_window = tracefs_create_file("window", 0640,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) top_dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) &hwlat_data.sample_window,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) &window_fops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) if (!hwlat_sample_window)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) hwlat_sample_width = tracefs_create_file("width", 0644,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) top_dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) &hwlat_data.sample_width,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) &width_fops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) if (!hwlat_sample_width)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) tracefs_remove(top_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) static void hwlat_tracer_start(struct trace_array *tr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) err = start_kthread(tr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) pr_err(BANNER "Cannot start hwlat kthread\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) static void hwlat_tracer_stop(struct trace_array *tr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) stop_kthread();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) static bool hwlat_busy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) static int hwlat_tracer_init(struct trace_array *tr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) /* Only allow one instance to enable this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) if (hwlat_busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) hwlat_trace = tr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) disable_migrate = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) hwlat_data.count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) tr->max_latency = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) save_tracing_thresh = tracing_thresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) /* tracing_thresh is in nsecs, we speak in usecs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) if (!tracing_thresh)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) tracing_thresh = last_tracing_thresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) if (tracer_tracing_is_on(tr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) hwlat_tracer_start(tr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) hwlat_busy = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) static void hwlat_tracer_reset(struct trace_array *tr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) stop_kthread();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) /* the tracing threshold is static between runs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) last_tracing_thresh = tracing_thresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) tracing_thresh = save_tracing_thresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) hwlat_busy = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) static struct tracer hwlat_tracer __read_mostly =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) .name = "hwlat",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) .init = hwlat_tracer_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) .reset = hwlat_tracer_reset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) .start = hwlat_tracer_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) .stop = hwlat_tracer_stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) .allow_instances = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) __init static int init_hwlat_tracer(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) mutex_init(&hwlat_data.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) ret = register_tracer(&hwlat_tracer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) init_tracefs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) late_initcall(init_hwlat_tracer);