^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) #ifndef _TOOLS_LINUX_RING_BUFFER_H_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #define _TOOLS_LINUX_RING_BUFFER_H_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <asm/barrier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/perf_event.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Contract with kernel for walking the perf ring buffer from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * user space requires the following barrier pairing (quote
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * from kernel/events/ring_buffer.c):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Since the mmap() consumer (userspace) can run on a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * different CPU:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * kernel user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * if (LOAD ->data_tail) { LOAD ->data_head
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * (A) smp_rmb() (C)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * STORE $data LOAD $data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * smp_wmb() (B) smp_mb() (D)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * STORE ->data_head STORE ->data_tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * Where A pairs with D, and B pairs with C.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * In our case A is a control dependency that separates the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * load of the ->data_tail and the stores of $data. In case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * ->data_tail indicates there is no room in the buffer to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * store $data we do not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * D needs to be a full barrier since it separates the data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * READ from the tail WRITE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * For B a WMB is sufficient since it separates two WRITEs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * and for C an RMB is sufficient since it separates two READs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * Note, instead of B, C, D we could also use smp_store_release()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * in B and D as well as smp_load_acquire() in C.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * However, this optimization does not make sense for all kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * supported architectures since for a fair number it would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * resolve into READ_ONCE() + smp_mb() pair for smp_load_acquire(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * and smp_mb() + WRITE_ONCE() pair for smp_store_release().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * Thus for those smp_wmb() in B and smp_rmb() in C would still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * be less expensive. For the case of D this has either the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * cost or is less expensive, for example, due to TSO x86 can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * avoid the CPU barrier entirely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) static inline u64 ring_buffer_read_head(struct perf_event_mmap_page *base)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * Architectures where smp_load_acquire() does not fallback to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * READ_ONCE() + smp_mb() pair.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #if defined(__x86_64__) || defined(__aarch64__) || defined(__powerpc64__) || \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) defined(__ia64__) || defined(__sparc__) && defined(__arch64__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) return smp_load_acquire(&base->data_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) u64 head = READ_ONCE(base->data_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) return head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) static inline void ring_buffer_write_tail(struct perf_event_mmap_page *base,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) u64 tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) smp_store_release(&base->data_tail, tail);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #endif /* _TOOLS_LINUX_RING_BUFFER_H_ */