Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /* Support for MMIO probes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * Benfit many code from kprobes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  * (C) 2002 Louis Zhuang <louis.zhuang@intel.com>.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *     2007 Alexander Eichner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *     2008 Pekka Paalanen <pq@iki.fi>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) #include <linux/list.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) #include <linux/rculist.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #include <linux/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #include <linux/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) #include <linux/preempt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #include <linux/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #include <linux/kdebug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #include <linux/io.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #include <asm/cacheflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) #include <asm/debugreg.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) #include <linux/mmiotrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) #define KMMIO_PAGE_HASH_BITS 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) #define KMMIO_PAGE_TABLE_SIZE (1 << KMMIO_PAGE_HASH_BITS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) struct kmmio_fault_page {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	struct kmmio_fault_page *release_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 	unsigned long addr; /* the requested address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 	pteval_t old_presence; /* page presence prior to arming */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	bool armed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	 * Number of times this page has been registered as a part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	 * of a probe. If zero, page is disarmed and this may be freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	 * Used only by writers (RCU) and post_kmmio_handler().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	 * Protected by kmmio_lock, when linked into kmmio_page_table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	bool scheduled_for_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) struct kmmio_delayed_release {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 	struct rcu_head rcu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 	struct kmmio_fault_page *release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) struct kmmio_context {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	struct kmmio_fault_page *fpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	struct kmmio_probe *probe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	unsigned long saved_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	int active;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) static DEFINE_SPINLOCK(kmmio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) /* Protected by kmmio_lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) unsigned int kmmio_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) /* Read-protected by RCU, write-protected by kmmio_lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) static struct list_head kmmio_page_table[KMMIO_PAGE_TABLE_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) static LIST_HEAD(kmmio_probes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) static struct list_head *kmmio_page_list(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	unsigned int l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	pte_t *pte = lookup_address(addr, &l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	if (!pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	addr &= page_level_mask(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	return &kmmio_page_table[hash_long(addr, KMMIO_PAGE_HASH_BITS)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) /* Accessed per-cpu */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) static DEFINE_PER_CPU(struct kmmio_context, kmmio_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90)  * this is basically a dynamic stabbing problem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91)  * Could use the existing prio tree code or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92)  * Possible better implementations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93)  * The Interval Skip List: A Data Structure for Finding All Intervals That
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94)  * Overlap a Point (might be simple)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95)  * Space Efficient Dynamic Stabbing with Fast Queries - Mikkel Thorup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) /* Get the kmmio at this addr (if any). You must be holding RCU read lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) static struct kmmio_probe *get_kmmio_probe(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	struct kmmio_probe *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 	list_for_each_entry_rcu(p, &kmmio_probes, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 		if (addr >= p->addr && addr < (p->addr + p->len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 			return p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /* You must be holding RCU read lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) static struct kmmio_fault_page *get_kmmio_fault_page(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	struct list_head *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	struct kmmio_fault_page *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	unsigned int l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	pte_t *pte = lookup_address(addr, &l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	if (!pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 	addr &= page_level_mask(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	head = kmmio_page_list(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	list_for_each_entry_rcu(f, head, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 		if (f->addr == addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 			return f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) static void clear_pmd_presence(pmd_t *pmd, bool clear, pmdval_t *old)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	pmd_t new_pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	pmdval_t v = pmd_val(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	if (clear) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 		*old = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 		new_pmd = pmd_mkinvalid(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 		/* Presume this has been called with clear==true previously */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 		new_pmd = __pmd(*old);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	set_pmd(pmd, new_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) static void clear_pte_presence(pte_t *pte, bool clear, pteval_t *old)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	pteval_t v = pte_val(*pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 	if (clear) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 		*old = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 		/* Nothing should care about address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 		pte_clear(&init_mm, 0, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 		/* Presume this has been called with clear==true previously */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 		set_pte_atomic(pte, __pte(*old));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) static int clear_page_presence(struct kmmio_fault_page *f, bool clear)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 	unsigned int level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	pte_t *pte = lookup_address(f->addr, &level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	if (!pte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 		pr_err("no pte for addr 0x%08lx\n", f->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	switch (level) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 	case PG_LEVEL_2M:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 		clear_pmd_presence((pmd_t *)pte, clear, &f->old_presence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	case PG_LEVEL_4K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 		clear_pte_presence(pte, clear, &f->old_presence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 		pr_err("unexpected page level 0x%x.\n", level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	flush_tlb_one_kernel(f->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)  * Mark the given page as not present. Access to it will trigger a fault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)  * Struct kmmio_fault_page is protected by RCU and kmmio_lock, but the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)  * protection is ignored here. RCU read lock is assumed held, so the struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)  * will not disappear unexpectedly. Furthermore, the caller must guarantee,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)  * that double arming the same virtual address (page) cannot occur.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)  * Double disarming on the other hand is allowed, and may occur when a fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)  * and mmiotrace shutdown happen simultaneously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) static int arm_kmmio_fault_page(struct kmmio_fault_page *f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 	WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	if (f->armed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 		pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 			f->addr, f->count, !!f->old_presence);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 	ret = clear_page_presence(f, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 		  f->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	f->armed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) /** Restore the given page to saved presence state. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) static void disarm_kmmio_fault_page(struct kmmio_fault_page *f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 	int ret = clear_page_presence(f, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 	WARN_ONCE(ret < 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 			KERN_ERR "kmmio disarming at 0x%08lx failed.\n", f->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 	f->armed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)  * This is being called from do_page_fault().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)  * We may be in an interrupt or a critical section. Also prefecthing may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)  * trigger a page fault. We may be in the middle of process switch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)  * We cannot take any locks, because we could be executing especially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)  * within a kmmio critical section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)  * Local interrupts are disabled, so preemption cannot happen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)  * Do not enable interrupts, do not sleep, and watch out for other CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)  * Interrupts are disabled on entry as trap3 is an interrupt gate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)  * and they remain disabled throughout this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) int kmmio_handler(struct pt_regs *regs, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	struct kmmio_context *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 	struct kmmio_fault_page *faultpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	int ret = 0; /* default to fault not handled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 	unsigned long page_base = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 	unsigned int l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 	pte_t *pte = lookup_address(addr, &l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 	if (!pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	page_base &= page_level_mask(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	 * Preemption is now disabled to prevent process switch during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	 * single stepping. We can only handle one active kmmio trace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	 * per cpu, so ensure that we finish it before something else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	 * gets to run. We also hold the RCU read lock over single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	 * stepping to avoid looking up the probe and kmmio_fault_page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	 * again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 	preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	faultpage = get_kmmio_fault_page(page_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 	if (!faultpage) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 		 * Either this page fault is not caused by kmmio, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 		 * another CPU just pulled the kmmio probe from under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 		 * our feet. The latter case should not be possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 		goto no_kmmio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 	ctx = this_cpu_ptr(&kmmio_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	if (ctx->active) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 		if (page_base == ctx->addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 			 * A second fault on the same page means some other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 			 * condition needs handling by do_page_fault(), the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 			 * page really not being present is the most common.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 			pr_debug("secondary hit for 0x%08lx CPU %d.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 				 addr, smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 			if (!faultpage->old_presence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 				pr_info("unexpected secondary hit for address 0x%08lx on CPU %d.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 					addr, smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 			 * Prevent overwriting already in-flight context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 			 * This should not happen, let's hope disarming at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 			 * least prevents a panic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 			pr_emerg("recursive probe hit on CPU %d, for address 0x%08lx. Ignoring.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 				 smp_processor_id(), addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 			pr_emerg("previous hit was at 0x%08lx.\n", ctx->addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 			disarm_kmmio_fault_page(faultpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 		goto no_kmmio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	ctx->active++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 	ctx->fpage = faultpage;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	ctx->probe = get_kmmio_probe(page_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 	ctx->saved_flags = (regs->flags & (X86_EFLAGS_TF | X86_EFLAGS_IF));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	ctx->addr = page_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	if (ctx->probe && ctx->probe->pre_handler)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 		ctx->probe->pre_handler(ctx->probe, regs, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 	 * Enable single-stepping and disable interrupts for the faulting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 	 * context. Local interrupts must not get enabled during stepping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 	regs->flags |= X86_EFLAGS_TF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 	regs->flags &= ~X86_EFLAGS_IF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 	/* Now we set present bit in PTE and single step. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 	disarm_kmmio_fault_page(ctx->fpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 	 * If another cpu accesses the same page while we are stepping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 	 * the access will not be caught. It will simply succeed and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 	 * only downside is we lose the event. If this becomes a problem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 	 * the user should drop to single cpu before tracing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	return 1; /* fault handled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) no_kmmio:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 	preempt_enable_no_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)  * Interrupts are disabled on entry as trap1 is an interrupt gate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)  * and they remain disabled throughout this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)  * This must always get called as the pair to kmmio_handler().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 	struct kmmio_context *ctx = this_cpu_ptr(&kmmio_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 	if (!ctx->active) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 		 * debug traps without an active context are due to either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 		 * something external causing them (f.e. using a debugger while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 		 * mmio tracing enabled), or erroneous behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 		pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 	if (ctx->probe && ctx->probe->post_handler)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 		ctx->probe->post_handler(ctx->probe, condition, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 	/* Prevent racing against release_kmmio_fault_page(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 	spin_lock(&kmmio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 	if (ctx->fpage->count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 		arm_kmmio_fault_page(ctx->fpage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 	spin_unlock(&kmmio_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 	regs->flags &= ~X86_EFLAGS_TF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 	regs->flags |= ctx->saved_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 	/* These were acquired in kmmio_handler(). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 	ctx->active--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 	BUG_ON(ctx->active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 	preempt_enable_no_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 	 * if somebody else is singlestepping across a probe point, flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 	 * will have TF set, in which case, continue the remaining processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 	 * of do_debug, as if this is not a probe hit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	if (!(regs->flags & X86_EFLAGS_TF))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 		ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) /* You must be holding kmmio_lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) static int add_kmmio_fault_page(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 	struct kmmio_fault_page *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 	f = get_kmmio_fault_page(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 	if (f) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 		if (!f->count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 			arm_kmmio_fault_page(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 		f->count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 	f = kzalloc(sizeof(*f), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 	if (!f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 	f->count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 	f->addr = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 	if (arm_kmmio_fault_page(f)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 		kfree(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 	list_add_rcu(&f->list, kmmio_page_list(f->addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) /* You must be holding kmmio_lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) static void release_kmmio_fault_page(unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 				struct kmmio_fault_page **release_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 	struct kmmio_fault_page *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 	f = get_kmmio_fault_page(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 	if (!f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 	f->count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 	BUG_ON(f->count < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 	if (!f->count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 		disarm_kmmio_fault_page(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 		if (!f->scheduled_for_release) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 			f->release_next = *release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 			*release_list = f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 			f->scheduled_for_release = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)  * With page-unaligned ioremaps, one or two armed pages may contain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)  * addresses from outside the intended mapping. Events for these addresses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)  * are currently silently dropped. The events may result only from programming
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)  * mistakes by accessing addresses before the beginning or past the end of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)  * mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) int register_kmmio_probe(struct kmmio_probe *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 	unsigned long size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 	unsigned long addr = p->addr & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 	unsigned int l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 	pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 	spin_lock_irqsave(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 	if (get_kmmio_probe(addr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 		ret = -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 	pte = lookup_address(addr, &l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 	if (!pte) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 	kmmio_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 	list_add_rcu(&p->list, &kmmio_probes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 	while (size < size_lim) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 		if (add_kmmio_fault_page(addr + size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 			pr_err("Unable to set page fault.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 		size += page_level_size(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) 	spin_unlock_irqrestore(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 	 * XXX: What should I do here?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) 	 * Here was a call to global_flush_tlb(), but it does not exist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 	 * anymore. It seems it's not needed after all.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) EXPORT_SYMBOL(register_kmmio_probe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) static void rcu_free_kmmio_fault_pages(struct rcu_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 	struct kmmio_delayed_release *dr = container_of(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 						head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 						struct kmmio_delayed_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 						rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 	struct kmmio_fault_page *f = dr->release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) 	while (f) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) 		struct kmmio_fault_page *next = f->release_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 		BUG_ON(f->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 		kfree(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) 		f = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 	kfree(dr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) static void remove_kmmio_fault_pages(struct rcu_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) 	struct kmmio_delayed_release *dr =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 		container_of(head, struct kmmio_delayed_release, rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) 	struct kmmio_fault_page *f = dr->release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 	struct kmmio_fault_page **prevp = &dr->release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 	spin_lock_irqsave(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 	while (f) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 		if (!f->count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) 			list_del_rcu(&f->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 			prevp = &f->release_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) 			*prevp = f->release_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 			f->release_next = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) 			f->scheduled_for_release = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) 		f = *prevp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) 	spin_unlock_irqrestore(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) 	/* This is the real RCU destroy call. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 	call_rcu(&dr->rcu, rcu_free_kmmio_fault_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)  * Remove a kmmio probe. You have to synchronize_rcu() before you can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)  * sure that the callbacks will not be called anymore. Only after that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)  * you may actually release your struct kmmio_probe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)  * Unregistering a kmmio fault page has three steps:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)  * 1. release_kmmio_fault_page()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)  *    Disarm the page, wait a grace period to let all faults finish.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)  * 2. remove_kmmio_fault_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)  *    Remove the pages from kmmio_page_table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)  * 3. rcu_free_kmmio_fault_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)  *    Actually free the kmmio_fault_page structs as with RCU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) void unregister_kmmio_probe(struct kmmio_probe *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) 	unsigned long size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) 	unsigned long addr = p->addr & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) 	const unsigned long size_lim = p->len + (p->addr & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) 	struct kmmio_fault_page *release_list = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 	struct kmmio_delayed_release *drelease;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) 	unsigned int l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) 	pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) 	pte = lookup_address(addr, &l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) 	if (!pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) 	spin_lock_irqsave(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) 	while (size < size_lim) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 		release_kmmio_fault_page(addr + size, &release_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) 		size += page_level_size(l);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) 	list_del_rcu(&p->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) 	kmmio_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) 	spin_unlock_irqrestore(&kmmio_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) 	if (!release_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) 	drelease = kmalloc(sizeof(*drelease), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) 	if (!drelease) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) 		pr_crit("leaking kmmio_fault_page objects.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) 	drelease->release_list = release_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) 	 * This is not really RCU here. We have just disarmed a set of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) 	 * pages so that they cannot trigger page faults anymore. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) 	 * we cannot remove the pages from kmmio_page_table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) 	 * because a probe hit might be in flight on another CPU. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) 	 * pages are collected into a list, and they will be removed from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) 	 * kmmio_page_table when it is certain that no probe hit related to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) 	 * these pages can be in flight. RCU grace period sounds like a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) 	 * good choice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) 	 * If we removed the pages too early, kmmio page fault handler might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) 	 * not find the respective kmmio_fault_page and determine it's not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) 	 * a kmmio fault, when it actually is. This would lead to madness.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) 	call_rcu(&drelease->rcu, remove_kmmio_fault_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) EXPORT_SYMBOL(unregister_kmmio_probe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) kmmio_die_notifier(struct notifier_block *nb, unsigned long val, void *args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) 	struct die_args *arg = args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) 	unsigned long* dr6_p = (unsigned long *)ERR_PTR(arg->err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) 	if (val == DIE_DEBUG && (*dr6_p & DR_STEP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) 		if (post_kmmio_handler(*dr6_p, arg->regs) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) 			 * Reset the BS bit in dr6 (pointed by args->err) to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) 			 * denote completion of processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) 			*dr6_p &= ~DR_STEP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) 			return NOTIFY_STOP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) 	return NOTIFY_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) static struct notifier_block nb_die = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) 	.notifier_call = kmmio_die_notifier
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) int kmmio_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) 	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) 		INIT_LIST_HEAD(&kmmio_page_table[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) 	return register_die_notifier(&nb_die);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) void kmmio_cleanup(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) 	unregister_die_notifier(&nb_die);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) 	for (i = 0; i < KMMIO_PAGE_TABLE_SIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) 		WARN_ONCE(!list_empty(&kmmio_page_table[i]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) 			KERN_ERR "kmmio_page_table not empty at cleanup, any further tracing will leak memory.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) }