^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* Copyright (c) 2016 Facebook
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/bpf.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/jhash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/filter.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/stacktrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/perf_event.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/elf.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/irq_work.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/btf_ids.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "percpu_freelist.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #define STACK_CREATE_FLAG_MASK \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY | \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) BPF_F_STACK_BUILD_ID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) struct stack_map_bucket {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) struct pcpu_freelist_node fnode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) u32 hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) u32 nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) u64 data[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) struct bpf_stack_map {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) struct bpf_map map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) void *elems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) struct pcpu_freelist freelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) u32 n_buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) struct stack_map_bucket *buckets[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /* irq_work to run up_read() for build_id lookup in nmi context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) struct stack_map_irq_work {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) struct irq_work irq_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) static void do_up_read(struct irq_work *entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) struct stack_map_irq_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) work = container_of(entry, struct stack_map_irq_work, irq_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) mmap_read_unlock_non_owner(work->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) static inline bool stack_map_use_build_id(struct bpf_map *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) return (map->map_flags & BPF_F_STACK_BUILD_ID);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) static inline int stack_map_data_size(struct bpf_map *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) return stack_map_use_build_id(map) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) sizeof(struct bpf_stack_build_id) : sizeof(u64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) static int prealloc_elems_and_freelist(struct bpf_stack_map *smap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) u64 elem_size = sizeof(struct stack_map_bucket) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) (u64)smap->map.value_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) smap->elems = bpf_map_area_alloc(elem_size * smap->map.max_entries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) smap->map.numa_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) if (!smap->elems)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) err = pcpu_freelist_init(&smap->freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) goto free_elems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) pcpu_freelist_populate(&smap->freelist, smap->elems, elem_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) smap->map.max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) free_elems:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) bpf_map_area_free(smap->elems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /* Called from syscall */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) u32 value_size = attr->value_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) struct bpf_stack_map *smap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) struct bpf_map_memory mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) u64 cost, n_buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) if (!bpf_capable())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) return ERR_PTR(-EPERM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) /* check sanity of attributes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) if (attr->max_entries == 0 || attr->key_size != 4 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) value_size < 8 || value_size % 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) BUILD_BUG_ON(sizeof(struct bpf_stack_build_id) % sizeof(u64));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (attr->map_flags & BPF_F_STACK_BUILD_ID) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) if (value_size % sizeof(struct bpf_stack_build_id) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) value_size / sizeof(struct bpf_stack_build_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) > sysctl_perf_event_max_stack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) } else if (value_size / 8 > sysctl_perf_event_max_stack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) /* hash table size must be power of 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) n_buckets = roundup_pow_of_two(attr->max_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) if (!n_buckets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) return ERR_PTR(-E2BIG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) cost = n_buckets * sizeof(struct stack_map_bucket *) + sizeof(*smap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) cost += n_buckets * (value_size + sizeof(struct stack_map_bucket));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) err = bpf_map_charge_init(&mem, cost);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) smap = bpf_map_area_alloc(cost, bpf_map_attr_numa_node(attr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if (!smap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) bpf_map_charge_finish(&mem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) bpf_map_init_from_attr(&smap->map, attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) smap->map.value_size = value_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) smap->n_buckets = n_buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) err = get_callchain_buffers(sysctl_perf_event_max_stack);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) goto free_charge;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) err = prealloc_elems_and_freelist(smap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) goto put_buffers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) bpf_map_charge_move(&smap->map.memory, &mem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return &smap->map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) put_buffers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) put_callchain_buffers();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) free_charge:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) bpf_map_charge_finish(&mem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) bpf_map_area_free(smap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) #define BPF_BUILD_ID 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) * Parse build id from the note segment. This logic can be shared between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * identical.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) static inline int stack_map_parse_build_id(void *page_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) unsigned char *build_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) void *note_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) Elf32_Word note_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) Elf32_Word note_offs = 0, new_offs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) /* check for overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) if (note_start < page_addr || note_start + note_size < note_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) /* only supports note that fits in the first page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) if (note_start + note_size > page_addr + PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) if (nhdr->n_type == BPF_BUILD_ID &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) nhdr->n_namesz == sizeof("GNU") &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) nhdr->n_descsz > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) memcpy(build_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) note_start + note_offs +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) nhdr->n_descsz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) memset(build_id + nhdr->n_descsz, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) BPF_BUILD_ID_SIZE - nhdr->n_descsz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) new_offs = note_offs + sizeof(Elf32_Nhdr) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) if (new_offs <= note_offs) /* overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) note_offs = new_offs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) /* Parse build ID from 32-bit ELF */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) static int stack_map_get_build_id_32(void *page_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) unsigned char *build_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) Elf32_Phdr *phdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) /* only supports phdr that fits in one page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) if (ehdr->e_phnum >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) for (i = 0; i < ehdr->e_phnum; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) if (phdr[i].p_type == PT_NOTE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) !stack_map_parse_build_id(page_addr, build_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) page_addr + phdr[i].p_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) phdr[i].p_filesz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) /* Parse build ID from 64-bit ELF */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) static int stack_map_get_build_id_64(void *page_addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) unsigned char *build_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) Elf64_Phdr *phdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) /* only supports phdr that fits in one page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) if (ehdr->e_phnum >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) for (i = 0; i < ehdr->e_phnum; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) if (phdr[i].p_type == PT_NOTE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) !stack_map_parse_build_id(page_addr, build_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) page_addr + phdr[i].p_offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) phdr[i].p_filesz))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) /* Parse build ID of ELF file mapped to vma */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) static int stack_map_get_build_id(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) unsigned char *build_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) Elf32_Ehdr *ehdr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) void *page_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) /* only works for page backed storage */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) if (!vma->vm_file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) page = find_get_page(vma->vm_file->f_mapping, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) return -EFAULT; /* page not mapped */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) page_addr = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) ehdr = (Elf32_Ehdr *)page_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) /* compare magic x7f "ELF" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) /* only support executable file and shared object file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) ret = stack_map_get_build_id_32(page_addr, build_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) ret = stack_map_get_build_id_64(page_addr, build_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) kunmap_atomic(page_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) u64 *ips, u32 trace_nr, bool user)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) bool irq_work_busy = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) struct stack_map_irq_work *work = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) if (irqs_disabled()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) if (!IS_ENABLED(CONFIG_PREEMPT_RT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) work = this_cpu_ptr(&up_read_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) if (atomic_read(&work->irq_work.flags) & IRQ_WORK_BUSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) /* cannot queue more up_read, fallback */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) irq_work_busy = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) * PREEMPT_RT does not allow to trylock mmap sem in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * interrupt disabled context. Force the fallback code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) irq_work_busy = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * We cannot do up_read() when the irq is disabled, because of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * risk to deadlock with rq_lock. To do build_id lookup when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * irqs are disabled, we need to run up_read() in irq_work. We use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * a percpu variable to do the irq_work. If the irq_work is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * already used by another lookup, we fall back to report ips.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * Same fallback is used for kernel stack (!user) on a stackmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * with build_id.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) if (!user || !current || !current->mm || irq_work_busy ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) !mmap_read_trylock_non_owner(current->mm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) /* cannot access current->mm, fall back to ips */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) for (i = 0; i < trace_nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) id_offs[i].status = BPF_STACK_BUILD_ID_IP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) id_offs[i].ip = ips[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) for (i = 0; i < trace_nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) vma = find_vma(current->mm, ips[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) if (!vma || stack_map_get_build_id(vma, id_offs[i].build_id)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) /* per entry fall back to ips */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) id_offs[i].status = BPF_STACK_BUILD_ID_IP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) id_offs[i].ip = ips[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) memset(id_offs[i].build_id, 0, BPF_BUILD_ID_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) id_offs[i].offset = (vma->vm_pgoff << PAGE_SHIFT) + ips[i]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) - vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) if (!work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) mmap_read_unlock_non_owner(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) work->mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) irq_work_queue(&work->irq_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) static struct perf_callchain_entry *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) get_callchain_entry_for_task(struct task_struct *task, u32 max_depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) #ifdef CONFIG_STACKTRACE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) struct perf_callchain_entry *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) int rctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) entry = get_callchain_entry(&rctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) if (!entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) entry->nr = stack_trace_save_tsk(task, (unsigned long *)entry->ip,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) max_depth, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) /* stack_trace_save_tsk() works on unsigned long array, while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * perf_callchain_entry uses u64 array. For 32-bit systems, it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * necessary to fix this mismatch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) if (__BITS_PER_LONG != 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) unsigned long *from = (unsigned long *) entry->ip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) u64 *to = entry->ip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) /* copy data from the end to avoid using extra buffer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) for (i = entry->nr - 1; i >= 0; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) to[i] = (u64)(from[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) put_callchain_entry(rctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) return entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) #else /* CONFIG_STACKTRACE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) static long __bpf_get_stackid(struct bpf_map *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) struct perf_callchain_entry *trace, u64 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) struct stack_map_bucket *bucket, *new_bucket, *old_bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) u32 hash, id, trace_nr, trace_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) bool user = flags & BPF_F_USER_STACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) u64 *ips;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) bool hash_matches;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) if (trace->nr <= skip)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) /* skipping more than usable stack trace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) trace_nr = trace->nr - skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) trace_len = trace_nr * sizeof(u64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) ips = trace->ip + skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) hash = jhash2((u32 *)ips, trace_len / sizeof(u32), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) id = hash & (smap->n_buckets - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) bucket = READ_ONCE(smap->buckets[id]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) hash_matches = bucket && bucket->hash == hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) /* fast cmp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) if (hash_matches && flags & BPF_F_FAST_STACK_CMP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) return id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) if (stack_map_use_build_id(map)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) /* for build_id+offset, pop a bucket before slow cmp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) new_bucket = (struct stack_map_bucket *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) pcpu_freelist_pop(&smap->freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) if (unlikely(!new_bucket))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) new_bucket->nr = trace_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) stack_map_get_build_id_offset(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) (struct bpf_stack_build_id *)new_bucket->data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) ips, trace_nr, user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) trace_len = trace_nr * sizeof(struct bpf_stack_build_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) if (hash_matches && bucket->nr == trace_nr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) memcmp(bucket->data, new_bucket->data, trace_len) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) return id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) if (bucket && !(flags & BPF_F_REUSE_STACKID)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) pcpu_freelist_push(&smap->freelist, &new_bucket->fnode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) if (hash_matches && bucket->nr == trace_nr &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) memcmp(bucket->data, ips, trace_len) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) return id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) if (bucket && !(flags & BPF_F_REUSE_STACKID))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) new_bucket = (struct stack_map_bucket *)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) pcpu_freelist_pop(&smap->freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) if (unlikely(!new_bucket))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) memcpy(new_bucket->data, ips, trace_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) new_bucket->hash = hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) new_bucket->nr = trace_nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) old_bucket = xchg(&smap->buckets[id], new_bucket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) if (old_bucket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) return id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) u64, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) u32 max_depth = map->value_size / stack_map_data_size(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) bool user = flags & BPF_F_USER_STACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) struct perf_callchain_entry *trace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) bool kernel = !user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) max_depth += skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) if (max_depth > sysctl_perf_event_max_stack)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) max_depth = sysctl_perf_event_max_stack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) if (unlikely(!trace))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) /* couldn't fetch the stack trace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) return __bpf_get_stackid(map, trace, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) const struct bpf_func_proto bpf_get_stackid_proto = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) .func = bpf_get_stackid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) .gpl_only = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) .ret_type = RET_INTEGER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) .arg1_type = ARG_PTR_TO_CTX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) .arg2_type = ARG_CONST_MAP_PTR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) .arg3_type = ARG_ANYTHING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) static __u64 count_kernel_ip(struct perf_callchain_entry *trace)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) __u64 nr_kernel = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) while (nr_kernel < trace->nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) if (trace->ip[nr_kernel] == PERF_CONTEXT_USER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) nr_kernel++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) return nr_kernel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) BPF_CALL_3(bpf_get_stackid_pe, struct bpf_perf_event_data_kern *, ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) struct bpf_map *, map, u64, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) struct perf_event *event = ctx->event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) struct perf_callchain_entry *trace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) bool kernel, user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) __u64 nr_kernel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) /* perf_sample_data doesn't have callchain, use bpf_get_stackid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) return bpf_get_stackid((unsigned long)(ctx->regs),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) (unsigned long) map, flags, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) BPF_F_FAST_STACK_CMP | BPF_F_REUSE_STACKID)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) user = flags & BPF_F_USER_STACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) kernel = !user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) trace = ctx->data->callchain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) if (unlikely(!trace))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) nr_kernel = count_kernel_ip(trace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) if (kernel) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) __u64 nr = trace->nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) trace->nr = nr_kernel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) ret = __bpf_get_stackid(map, trace, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) /* restore nr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) trace->nr = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) } else { /* user */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) skip += nr_kernel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) if (skip > BPF_F_SKIP_FIELD_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) ret = __bpf_get_stackid(map, trace, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) const struct bpf_func_proto bpf_get_stackid_proto_pe = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) .func = bpf_get_stackid_pe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) .gpl_only = false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) .ret_type = RET_INTEGER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) .arg1_type = ARG_PTR_TO_CTX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) .arg2_type = ARG_CONST_MAP_PTR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) .arg3_type = ARG_ANYTHING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) struct perf_callchain_entry *trace_in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) void *buf, u32 size, u64 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) u32 trace_nr, copy_len, elem_size, num_elem, max_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) bool user_build_id = flags & BPF_F_USER_BUILD_ID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) u32 skip = flags & BPF_F_SKIP_FIELD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) bool user = flags & BPF_F_USER_STACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) struct perf_callchain_entry *trace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) bool kernel = !user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) int err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) u64 *ips;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) BPF_F_USER_BUILD_ID)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) goto clear;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) if (kernel && user_build_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) goto clear;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) elem_size = (user && user_build_id) ? sizeof(struct bpf_stack_build_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) : sizeof(u64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) if (unlikely(size % elem_size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) goto clear;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) /* cannot get valid user stack for task without user_mode regs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) if (task && user && !user_mode(regs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) goto err_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) num_elem = size / elem_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) max_depth = num_elem + skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) if (sysctl_perf_event_max_stack < max_depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) max_depth = sysctl_perf_event_max_stack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) if (trace_in)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) trace = trace_in;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) else if (kernel && task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) trace = get_callchain_entry_for_task(task, max_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) trace = get_perf_callchain(regs, 0, kernel, user, max_depth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) if (unlikely(!trace))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) goto err_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) if (trace->nr < skip)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) goto err_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) trace_nr = trace->nr - skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) trace_nr = (trace_nr <= num_elem) ? trace_nr : num_elem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) copy_len = trace_nr * elem_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) ips = trace->ip + skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) if (user && user_build_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) stack_map_get_build_id_offset(buf, ips, trace_nr, user);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) memcpy(buf, ips, copy_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) if (size > copy_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) memset(buf + copy_len, 0, size - copy_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) return copy_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) err_fault:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) err = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) clear:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) memset(buf, 0, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) BPF_CALL_4(bpf_get_stack, struct pt_regs *, regs, void *, buf, u32, size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) u64, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) const struct bpf_func_proto bpf_get_stack_proto = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) .func = bpf_get_stack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) .gpl_only = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) .ret_type = RET_INTEGER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) .arg1_type = ARG_PTR_TO_CTX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) .arg2_type = ARG_PTR_TO_UNINIT_MEM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) .arg3_type = ARG_CONST_SIZE_OR_ZERO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) .arg4_type = ARG_ANYTHING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) BPF_CALL_4(bpf_get_task_stack, struct task_struct *, task, void *, buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) u32, size, u64, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) struct pt_regs *regs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) long res = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) if (!try_get_task_stack(task))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) regs = task_pt_regs(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) if (regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) res = __bpf_get_stack(regs, task, NULL, buf, size, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) put_task_stack(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) return res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) BTF_ID_LIST_SINGLE(bpf_get_task_stack_btf_ids, struct, task_struct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) const struct bpf_func_proto bpf_get_task_stack_proto = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) .func = bpf_get_task_stack,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) .gpl_only = false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) .ret_type = RET_INTEGER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) .arg1_type = ARG_PTR_TO_BTF_ID,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) .arg1_btf_id = &bpf_get_task_stack_btf_ids[0],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) .arg2_type = ARG_PTR_TO_UNINIT_MEM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) .arg3_type = ARG_CONST_SIZE_OR_ZERO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) .arg4_type = ARG_ANYTHING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) BPF_CALL_4(bpf_get_stack_pe, struct bpf_perf_event_data_kern *, ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) void *, buf, u32, size, u64, flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) struct pt_regs *regs = (struct pt_regs *)(ctx->regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) struct perf_event *event = ctx->event;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) struct perf_callchain_entry *trace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) bool kernel, user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) int err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) __u64 nr_kernel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) if (!(event->attr.sample_type & __PERF_SAMPLE_CALLCHAIN_EARLY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) return __bpf_get_stack(regs, NULL, NULL, buf, size, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) if (unlikely(flags & ~(BPF_F_SKIP_FIELD_MASK | BPF_F_USER_STACK |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) BPF_F_USER_BUILD_ID)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) goto clear;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) user = flags & BPF_F_USER_STACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) kernel = !user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) err = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) trace = ctx->data->callchain;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) if (unlikely(!trace))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) goto clear;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) nr_kernel = count_kernel_ip(trace);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) if (kernel) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) __u64 nr = trace->nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) trace->nr = nr_kernel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) /* restore nr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) trace->nr = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) } else { /* user */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) u64 skip = flags & BPF_F_SKIP_FIELD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) skip += nr_kernel;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) if (skip > BPF_F_SKIP_FIELD_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) goto clear;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) flags = (flags & ~BPF_F_SKIP_FIELD_MASK) | skip;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) err = __bpf_get_stack(regs, NULL, trace, buf, size, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) clear:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) memset(buf, 0, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) const struct bpf_func_proto bpf_get_stack_proto_pe = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) .func = bpf_get_stack_pe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) .gpl_only = true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) .ret_type = RET_INTEGER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) .arg1_type = ARG_PTR_TO_CTX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) .arg2_type = ARG_PTR_TO_UNINIT_MEM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) .arg3_type = ARG_CONST_SIZE_OR_ZERO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) .arg4_type = ARG_ANYTHING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) /* Called from eBPF program */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) static void *stack_map_lookup_elem(struct bpf_map *map, void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) return ERR_PTR(-EOPNOTSUPP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) /* Called from syscall */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) struct stack_map_bucket *bucket, *old_bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) u32 id = *(u32 *)key, trace_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) if (unlikely(id >= smap->n_buckets))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) bucket = xchg(&smap->buckets[id], NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) if (!bucket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) trace_len = bucket->nr * stack_map_data_size(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) memcpy(value, bucket->data, trace_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) memset(value + trace_len, 0, map->value_size - trace_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) old_bucket = xchg(&smap->buckets[id], bucket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) if (old_bucket)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) static int stack_map_get_next_key(struct bpf_map *map, void *key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) void *next_key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) struct bpf_stack_map *smap = container_of(map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) struct bpf_stack_map, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) u32 id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) WARN_ON_ONCE(!rcu_read_lock_held());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) if (!key) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) id = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) id = *(u32 *)key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) if (id >= smap->n_buckets || !smap->buckets[id])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) id = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) id++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) while (id < smap->n_buckets && !smap->buckets[id])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) id++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) if (id >= smap->n_buckets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) *(u32 *)next_key = id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) static int stack_map_update_elem(struct bpf_map *map, void *key, void *value,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) u64 map_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) /* Called from syscall or from eBPF program */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) static int stack_map_delete_elem(struct bpf_map *map, void *key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) struct stack_map_bucket *old_bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) u32 id = *(u32 *)key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) if (unlikely(id >= smap->n_buckets))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) return -E2BIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) old_bucket = xchg(&smap->buckets[id], NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) if (old_bucket) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) pcpu_freelist_push(&smap->freelist, &old_bucket->fnode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) /* Called when map->refcnt goes to zero, either from workqueue or from syscall */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) static void stack_map_free(struct bpf_map *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) bpf_map_area_free(smap->elems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) pcpu_freelist_destroy(&smap->freelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) bpf_map_area_free(smap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) put_callchain_buffers();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) static int stack_trace_map_btf_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) const struct bpf_map_ops stack_trace_map_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) .map_meta_equal = bpf_map_meta_equal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) .map_alloc = stack_map_alloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) .map_free = stack_map_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) .map_get_next_key = stack_map_get_next_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) .map_lookup_elem = stack_map_lookup_elem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) .map_update_elem = stack_map_update_elem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) .map_delete_elem = stack_map_delete_elem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) .map_check_btf = map_check_no_btf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) .map_btf_name = "bpf_stack_map",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) .map_btf_id = &stack_trace_map_btf_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) static int __init stack_map_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) struct stack_map_irq_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) work = per_cpu_ptr(&up_read_work, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) init_irq_work(&work->irq_work, do_up_read);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) subsys_initcall(stack_map_init);