^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* Support for MMIO probes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Benfit many code from kprobes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * 2007 Alexander Eichner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * 2008 Pekka Paalanen <pq@iki.fi>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/list.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/rculist.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/preempt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/kdebug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/io.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <asm/cacheflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <asm/debugreg.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/mmiotrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #define KMMIO_PAGE_HASH_BITS 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) struct kmmio_fault_page {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) struct kmmio_fault_page *release_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) unsigned long addr; /* the requested address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) pteval_t old_presence; /* page presence prior to arming */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) bool armed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * Number of times this page has been registered as a part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * of a probe. If zero, page is disarmed and this may be freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * Used only by writers (RCU) and post_kmmio_handler().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * Protected by kmmio_lock, when linked into kmmio_page_table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) bool scheduled_for_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) struct kmmio_delayed_release {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) struct rcu_head rcu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) struct kmmio_fault_page *release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) struct kmmio_context {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) struct kmmio_fault_page *fpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) struct kmmio_probe *probe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) unsigned long saved_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) int active;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) static DEFINE_SPINLOCK(kmmio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) /* Protected by kmmio_lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) unsigned int kmmio_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) /* Read-protected by RCU, write-protected by kmmio_lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) static LIST_HEAD(kmmio_probes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) static struct list_head *kmmio_page_list(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) unsigned int l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) pte_t *pte = lookup_address(addr, &l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) if (!pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) addr &= page_level_mask(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) /* Accessed per-cpu */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * this is basically a dynamic stabbing problem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * Could use the existing prio tree code or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * Possible better implementations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * The Interval Skip List: A Data Structure for Finding All Intervals That
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * Overlap a Point (might be simple)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) /* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) struct kmmio_probe *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) list_for_each_entry_rcu(p, &kmmio_probes, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) if (addr >= p->addr && addr < (p->addr + p->len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) return p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /* You must be holding RCU read lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) struct list_head *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) struct kmmio_fault_page *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) unsigned int l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) pte_t *pte = lookup_address(addr, &l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) if (!pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) addr &= page_level_mask(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) head = kmmio_page_list(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) list_for_each_entry_rcu(f, head, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) if (f->addr == addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) return f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) pmd_t new_pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) pmdval_t v = pmd_val(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) if (clear) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) *old = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) new_pmd = pmd_mkinvalid(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) /* Presume this has been called with clear==true previously */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) new_pmd = __pmd(*old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) set_pmd(pmd, new_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) pteval_t v = pte_val(*pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) if (clear) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) *old = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) /* Nothing should care about address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) pte_clear(&init_mm, 0, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) /* Presume this has been called with clear==true previously */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) set_pte_atomic(pte, __pte(*old));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) unsigned int level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) pte_t *pte = lookup_address(f->addr, &level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) if (!pte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) pr_err("no pte for addr 0x%08lx\n", f->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) switch (level) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) case PG_LEVEL_2M:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) case PG_LEVEL_4K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) clear_pte_presence(pte, clear, &f->old_presence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) pr_err("unexpected page level 0x%x.\n", level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) flush_tlb_one_kernel(f->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * Mark the given page as not present. Access to it will trigger a fault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * protection is ignored here. RCU read lock is assumed held, so the struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * will not disappear unexpectedly. Furthermore, the caller must guarantee,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * that double arming the same virtual address (page) cannot occur.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * Double disarming on the other hand is allowed, and may occur when a fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * and mmiotrace shutdown happen simultaneously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) if (f->armed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) f->addr, f->count, !!f->old_presence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) ret = clear_page_presence(f, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) f->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) f->armed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) /** Restore the given page to saved presence state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) int ret = clear_page_presence(f, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) WARN_ONCE(ret < 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) f->armed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * This is being called from do_page_fault().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * We may be in an interrupt or a critical section. Also prefecthing may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * trigger a page fault. We may be in the middle of process switch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * We cannot take any locks, because we could be executing especially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * within a kmmio critical section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * Local interrupts are disabled, so preemption cannot happen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * Do not enable interrupts, do not sleep, and watch out for other CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * Interrupts are disabled on entry as trap3 is an interrupt gate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * and they remain disabled throughout this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) int kmmio_handler(struct pt_regs *regs, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) struct kmmio_context *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) struct kmmio_fault_page *faultpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) int ret = 0; /* default to fault not handled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) unsigned long page_base = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) unsigned int l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) pte_t *pte = lookup_address(addr, &l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) if (!pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) page_base &= page_level_mask(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * Preemption is now disabled to prevent process switch during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * single stepping. We can only handle one active kmmio trace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * per cpu, so ensure that we finish it before something else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * gets to run. We also hold the RCU read lock over single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * stepping to avoid looking up the probe and kmmio_fault_page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) faultpage = get_kmmio_fault_page(page_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) if (!faultpage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * Either this page fault is not caused by kmmio, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * another CPU just pulled the kmmio probe from under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) * our feet. The latter case should not be possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) goto no_kmmio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) ctx = this_cpu_ptr(&kmmio_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) if (ctx->active) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) if (page_base == ctx->addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * A second fault on the same page means some other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * condition needs handling by do_page_fault(), the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * page really not being present is the most common.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) pr_debug("secondary hit for 0x%08lx CPU %d.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) addr, smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) if (!faultpage->old_presence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) addr, smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * Prevent overwriting already in-flight context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * This should not happen, let's hope disarming at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * least prevents a panic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) smp_processor_id(), addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) disarm_kmmio_fault_page(faultpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) goto no_kmmio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) ctx->active++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) ctx->fpage = faultpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) ctx->probe = get_kmmio_probe(page_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) ctx->addr = page_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) if (ctx->probe && ctx->probe->pre_handler)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) ctx->probe->pre_handler(ctx->probe, regs, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * Enable single-stepping and disable interrupts for the faulting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * context. Local interrupts must not get enabled during stepping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) regs->flags |= X86_EFLAGS_TF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) regs->flags &= ~X86_EFLAGS_IF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) /* Now we set present bit in PTE and single step. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) disarm_kmmio_fault_page(ctx->fpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * If another cpu accesses the same page while we are stepping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * the access will not be caught. It will simply succeed and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * only downside is we lose the event. If this becomes a problem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * the user should drop to single cpu before tracing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) return 1; /* fault handled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) no_kmmio:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) preempt_enable_no_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * Interrupts are disabled on entry as trap1 is an interrupt gate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) * and they remain disabled throughout this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) * This must always get called as the pair to kmmio_handler().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) struct kmmio_context *ctx = this_cpu_ptr(&kmmio_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) if (!ctx->active) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * debug traps without an active context are due to either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * something external causing them (f.e. using a debugger while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) * mmio tracing enabled), or erroneous behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) if (ctx->probe && ctx->probe->post_handler)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) ctx->probe->post_handler(ctx->probe, condition, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) /* Prevent racing against release_kmmio_fault_page(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) spin_lock(&kmmio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) if (ctx->fpage->count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) arm_kmmio_fault_page(ctx->fpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) spin_unlock(&kmmio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) regs->flags &= ~X86_EFLAGS_TF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) regs->flags |= ctx->saved_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) /* These were acquired in kmmio_handler(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) ctx->active--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) BUG_ON(ctx->active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) preempt_enable_no_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * if somebody else is singlestepping across a probe point, flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * will have TF set, in which case, continue the remaining processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * of do_debug, as if this is not a probe hit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) if (!(regs->flags & X86_EFLAGS_TF))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) /* You must be holding kmmio_lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) static int add_kmmio_fault_page(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) struct kmmio_fault_page *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) f = get_kmmio_fault_page(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) if (f) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) if (!f->count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) arm_kmmio_fault_page(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) f->count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) f = kzalloc(sizeof(*f), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) if (!f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) f->count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) f->addr = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) if (arm_kmmio_fault_page(f)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) kfree(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) list_add_rcu(&f->list, kmmio_page_list(f->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) /* You must be holding kmmio_lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) static void release_kmmio_fault_page(unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) struct kmmio_fault_page **release_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) struct kmmio_fault_page *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) f = get_kmmio_fault_page(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) if (!f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) f->count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) BUG_ON(f->count < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) if (!f->count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) disarm_kmmio_fault_page(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) if (!f->scheduled_for_release) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) f->release_next = *release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) *release_list = f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) f->scheduled_for_release = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) * With page-unaligned ioremaps, one or two armed pages may contain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) * addresses from outside the intended mapping. Events for these addresses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) * are currently silently dropped. The events may result only from programming
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) * mistakes by accessing addresses before the beginning or past the end of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) * mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) int register_kmmio_probe(struct kmmio_probe *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) unsigned long size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) unsigned long addr = p->addr & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) unsigned int l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) spin_lock_irqsave(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) if (get_kmmio_probe(addr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) ret = -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) pte = lookup_address(addr, &l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) if (!pte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) kmmio_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) list_add_rcu(&p->list, &kmmio_probes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) while (size < size_lim) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) if (add_kmmio_fault_page(addr + size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) pr_err("Unable to set page fault.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) size += page_level_size(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) spin_unlock_irqrestore(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) * XXX: What should I do here?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) * Here was a call to global_flush_tlb(), but it does not exist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * anymore. It seems it's not needed after all.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) EXPORT_SYMBOL(register_kmmio_probe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) struct kmmio_delayed_release *dr = container_of(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) struct kmmio_delayed_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) struct kmmio_fault_page *f = dr->release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) while (f) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) struct kmmio_fault_page *next = f->release_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) BUG_ON(f->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) kfree(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) f = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) kfree(dr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) static void remove_kmmio_fault_pages(struct rcu_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) struct kmmio_delayed_release *dr =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) container_of(head, struct kmmio_delayed_release, rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) struct kmmio_fault_page *f = dr->release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) struct kmmio_fault_page **prevp = &dr->release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) spin_lock_irqsave(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) while (f) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) if (!f->count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) list_del_rcu(&f->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) prevp = &f->release_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) *prevp = f->release_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) f->release_next = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) f->scheduled_for_release = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) f = *prevp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) spin_unlock_irqrestore(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) /* This is the real RCU destroy call. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * Remove a kmmio probe. You have to synchronize_rcu() before you can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * sure that the callbacks will not be called anymore. Only after that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * you may actually release your struct kmmio_probe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * Unregistering a kmmio fault page has three steps:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * 1. release_kmmio_fault_page()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) * Disarm the page, wait a grace period to let all faults finish.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) * 2. remove_kmmio_fault_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) * Remove the pages from kmmio_page_table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) * 3. rcu_free_kmmio_fault_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) * Actually free the kmmio_fault_page structs as with RCU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) void unregister_kmmio_probe(struct kmmio_probe *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) unsigned long size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) unsigned long addr = p->addr & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) struct kmmio_fault_page *release_list = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) struct kmmio_delayed_release *drelease;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) unsigned int l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) pte = lookup_address(addr, &l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) if (!pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) spin_lock_irqsave(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) while (size < size_lim) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) release_kmmio_fault_page(addr + size, &release_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) size += page_level_size(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) list_del_rcu(&p->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) kmmio_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) spin_unlock_irqrestore(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) if (!release_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) if (!drelease) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) pr_crit("leaking kmmio_fault_page objects.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) drelease->release_list = release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) * This is not really RCU here. We have just disarmed a set of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) * pages so that they cannot trigger page faults anymore. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) * we cannot remove the pages from kmmio_page_table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * because a probe hit might be in flight on another CPU. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * pages are collected into a list, and they will be removed from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * kmmio_page_table when it is certain that no probe hit related to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * these pages can be in flight. RCU grace period sounds like a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) * good choice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) * If we removed the pages too early, kmmio page fault handler might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) * not find the respective kmmio_fault_page and determine it's not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) * a kmmio fault, when it actually is. This would lead to madness.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) EXPORT_SYMBOL(unregister_kmmio_probe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) struct die_args *arg = args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) if (val == DIE_DEBUG && (*dr6_p & DR_STEP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) if (post_kmmio_handler(*dr6_p, arg->regs) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) * Reset the BS bit in dr6 (pointed by args->err) to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) * denote completion of processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) *dr6_p &= ~DR_STEP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) return NOTIFY_STOP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) return NOTIFY_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) static struct notifier_block nb_die = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) .notifier_call = kmmio_die_notifier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) int kmmio_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) INIT_LIST_HEAD(&kmmio_page_table[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) return register_die_notifier(&nb_die);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) void kmmio_cleanup(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) unregister_die_notifier(&nb_die);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) WARN_ONCE(!list_empty(&kmmio_page_table[i]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) }