^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 1991, 1992 Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2011 Don Zickus Red Hat, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Pentium III FXSR, SSE support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Gareth Hughes <gareth@valinux.com>, May 2000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Handle hardware traps and faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/kprobes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/kdebug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/sched/debug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/nmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/debugfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/hardirq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/ratelimit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/atomic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/sched/clock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <asm/cpu_entry_area.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <asm/traps.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <asm/mach_traps.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <asm/nmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <asm/x86_init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <asm/reboot.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <asm/cache.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <asm/nospec-branch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <asm/sev-es.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <trace/events/nmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) struct nmi_desc {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) raw_spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) struct list_head head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static struct nmi_desc nmi_desc[NMI_MAX] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[0].lock),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) .head = LIST_HEAD_INIT(nmi_desc[0].head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[1].lock),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) .head = LIST_HEAD_INIT(nmi_desc[1].head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[2].lock),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) .head = LIST_HEAD_INIT(nmi_desc[2].head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) .lock = __RAW_SPIN_LOCK_UNLOCKED(&nmi_desc[3].lock),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) .head = LIST_HEAD_INIT(nmi_desc[3].head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) struct nmi_stats {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) unsigned int normal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) unsigned int unknown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) unsigned int external;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) unsigned int swallow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) static DEFINE_PER_CPU(struct nmi_stats, nmi_stats);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) static int ignore_nmis __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) int unknown_nmi_panic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * Prevent NMI reason port (0x61) being accessed simultaneously, can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * only be used in NMI handler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) static int __init setup_unknown_nmi_panic(char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) unknown_nmi_panic = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #define nmi_to_desc(type) (&nmi_desc[type])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) static u64 nmi_longest_ns = 1 * NSEC_PER_MSEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) static int __init nmi_warning_debugfs(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) debugfs_create_u64("nmi_longest_ns", 0644,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) arch_debugfs_dir, &nmi_longest_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) fs_initcall(nmi_warning_debugfs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) static void nmi_check_duration(struct nmiaction *action, u64 duration)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) int remainder_ns, decimal_msecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) if (duration < nmi_longest_ns || duration < action->max_duration)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) action->max_duration = duration;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) remainder_ns = do_div(duration, (1000 * 1000));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) decimal_msecs = remainder_ns / 1000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) printk_ratelimited(KERN_INFO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) "INFO: NMI handler (%ps) took too long to run: %lld.%03d msecs\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) action->handler, duration, decimal_msecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) static int nmi_handle(unsigned int type, struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) struct nmi_desc *desc = nmi_to_desc(type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) struct nmiaction *a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) int handled=0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * NMIs are edge-triggered, which means if you have enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * of them concurrently, you can lose some because only one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * can be latched at any given time. Walk the whole list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * to handle those situations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) list_for_each_entry_rcu(a, &desc->head, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) int thishandled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) u64 delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) delta = sched_clock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) thishandled = a->handler(type, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) handled += thishandled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) delta = sched_clock() - delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) trace_nmi_handler(a->handler, (int)delta, thishandled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) nmi_check_duration(a, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) /* return total number of NMI events handled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) return handled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) NOKPROBE_SYMBOL(nmi_handle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) int __register_nmi_handler(unsigned int type, struct nmiaction *action)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) struct nmi_desc *desc = nmi_to_desc(type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) if (!action->handler)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) raw_spin_lock_irqsave(&desc->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) * Indicate if there are multiple registrations on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) * internal NMI handler call chains (SERR and IO_CHECK).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * some handlers need to be executed first otherwise a fake
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * event confuses some handlers (kdump uses this flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) if (action->flags & NMI_FLAG_FIRST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) list_add_rcu(&action->list, &desc->head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) list_add_tail_rcu(&action->list, &desc->head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) raw_spin_unlock_irqrestore(&desc->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) EXPORT_SYMBOL(__register_nmi_handler);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) void unregister_nmi_handler(unsigned int type, const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) struct nmi_desc *desc = nmi_to_desc(type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) struct nmiaction *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) raw_spin_lock_irqsave(&desc->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) list_for_each_entry_rcu(n, &desc->head, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * the name passed in to describe the nmi handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * is used as the lookup key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) if (!strcmp(n->name, name)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) WARN(in_nmi(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) "Trying to free NMI (%s) from NMI context!\n", n->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) list_del_rcu(&n->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) raw_spin_unlock_irqrestore(&desc->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) synchronize_rcu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) EXPORT_SYMBOL_GPL(unregister_nmi_handler);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) pci_serr_error(unsigned char reason, struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /* check to see if anyone registered against these types of errors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (nmi_handle(NMI_SERR, regs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) reason, smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) if (panic_on_unrecovered_nmi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) nmi_panic(regs, "NMI: Not continuing");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) pr_emerg("Dazed and confused, but trying to continue\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) /* Clear and disable the PCI SERR error line. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) outb(reason, NMI_REASON_PORT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) NOKPROBE_SYMBOL(pci_serr_error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) io_check_error(unsigned char reason, struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) /* check to see if anyone registered against these types of errors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) if (nmi_handle(NMI_IO_CHECK, regs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) pr_emerg(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) reason, smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) show_regs(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) if (panic_on_io_nmi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) nmi_panic(regs, "NMI IOCK error: Not continuing");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * If we end up here, it means we have received an NMI while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * processing panic(). Simply return without delaying and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * re-enabling NMIs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) /* Re-enable the IOCK line, wait for a few seconds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) outb(reason, NMI_REASON_PORT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) i = 20000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) while (--i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) touch_nmi_watchdog();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) udelay(100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) reason &= ~NMI_REASON_CLEAR_IOCHK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) outb(reason, NMI_REASON_PORT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) NOKPROBE_SYMBOL(io_check_error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) int handled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) * Use 'false' as back-to-back NMIs are dealt with one level up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * Of course this makes having multiple 'unknown' handlers useless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * as only the first one is ever run (unless it can actually determine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * if it caused the NMI)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) handled = nmi_handle(NMI_UNKNOWN, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) if (handled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) __this_cpu_add(nmi_stats.unknown, handled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) __this_cpu_add(nmi_stats.unknown, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) reason, smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) pr_emerg("Do you have a strange power saving mode enabled?\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (unknown_nmi_panic || panic_on_unrecovered_nmi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) nmi_panic(regs, "NMI: Not continuing");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) pr_emerg("Dazed and confused, but trying to continue\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) NOKPROBE_SYMBOL(unknown_nmi_error);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) static DEFINE_PER_CPU(bool, swallow_nmi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) static DEFINE_PER_CPU(unsigned long, last_nmi_rip);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) static noinstr void default_do_nmi(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) unsigned char reason = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) int handled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) bool b2b = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * CPU-specific NMI must be processed before non-CPU-specific
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * NMI, otherwise we may lose it, because the CPU-specific
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * NMI can not be detected/processed on other CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * Back-to-back NMIs are interesting because they can either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * be two NMI or more than two NMIs (any thing over two is dropped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * due to NMI being edge-triggered). If this is the second half
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * of the back-to-back NMI, assume we dropped things and process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * more handlers. Otherwise reset the 'swallow' NMI behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) if (regs->ip == __this_cpu_read(last_nmi_rip))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) b2b = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) __this_cpu_write(swallow_nmi, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) __this_cpu_write(last_nmi_rip, regs->ip);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) handled = nmi_handle(NMI_LOCAL, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) __this_cpu_add(nmi_stats.normal, handled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) if (handled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * There are cases when a NMI handler handles multiple
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) * events in the current NMI. One of these events may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) * be queued for in the next NMI. Because the event is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) * already handled, the next NMI will result in an unknown
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) * NMI. Instead lets flag this for a potential NMI to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) * swallow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) if (handled > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) __this_cpu_write(swallow_nmi, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * Non-CPU-specific NMI: NMI sources can be processed on any CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) * Another CPU may be processing panic routines while holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) * nmi_reason_lock. Check if the CPU issued the IPI for crash dumping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * and if so, call its callback directly. If there is no CPU preparing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * crash dump, we simply loop here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) while (!raw_spin_trylock(&nmi_reason_lock)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) run_crash_ipi_callback(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) reason = x86_platform.get_nmi_reason();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) if (reason & NMI_REASON_MASK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) if (reason & NMI_REASON_SERR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) pci_serr_error(reason, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) else if (reason & NMI_REASON_IOCHK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) io_check_error(reason, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) #ifdef CONFIG_X86_32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * Reassert NMI in case it became active
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * meanwhile as it's edge-triggered:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) reassert_nmi();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) __this_cpu_add(nmi_stats.external, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) raw_spin_unlock(&nmi_reason_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) raw_spin_unlock(&nmi_reason_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) * Only one NMI can be latched at a time. To handle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) * this we may process multiple nmi handlers at once to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) * cover the case where an NMI is dropped. The downside
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) * to this approach is we may process an NMI prematurely,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) * while its real NMI is sitting latched. This will cause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * an unknown NMI on the next run of the NMI processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) * We tried to flag that condition above, by setting the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) * swallow_nmi flag when we process more than one event.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) * This condition is also only present on the second half
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) * of a back-to-back NMI, so we flag that condition too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) * If both are true, we assume we already processed this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) * NMI previously and we swallow it. Otherwise we reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) * the logic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) * There are scenarios where we may accidentally swallow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * a 'real' unknown NMI. For example, while processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) * a perf NMI another perf NMI comes in along with a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) * 'real' unknown NMI. These two NMIs get combined into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * one (as described above). When the next NMI gets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * processed, it will be flagged by perf as handled, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) * no one will know that there was a 'real' unknown NMI sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * also. As a result it gets swallowed. Or if the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * perf NMI returns two events handled then the second
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * NMI will get eaten by the logic below, again losing a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * 'real' unknown NMI. But this is the best we can do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) * for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) if (b2b && __this_cpu_read(swallow_nmi))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) __this_cpu_add(nmi_stats.swallow, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) unknown_nmi_error(reason, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) * NMIs can page fault or hit breakpoints which will cause it to lose
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) * its NMI context with the CPU when the breakpoint or page fault does an IRET.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) * As a result, NMIs can nest if NMIs get unmasked due an IRET during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) * NMI processing. On x86_64, the asm glue protects us from nested NMIs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) * if the outer NMI came from kernel mode, but we can still nest if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) * outer NMI came from user mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) * To handle these nested NMIs, we have three states:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) * 1) not running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) * 2) executing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * 3) latched
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) * When no NMI is in progress, it is in the "not running" state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * When an NMI comes in, it goes into the "executing" state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * Normally, if another NMI is triggered, it does not interrupt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * the running NMI and the HW will simply latch it so that when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) * the first NMI finishes, it will restart the second NMI.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * (Note, the latch is binary, thus multiple NMIs triggering,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) * when one is running, are ignored. Only one NMI is restarted.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * If an NMI executes an iret, another NMI can preempt it. We do not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * want to allow this new NMI to run, but we want to execute it when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) * first one finishes. We set the state to "latched", and the exit of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * the first NMI will perform a dec_return, if the result is zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * (NOT_RUNNING), then it will simply exit the NMI handler. If not, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * dec_return would have set the state to NMI_EXECUTING (what we want it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * to be when we are running). In this case, we simply jump back to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * rerun the NMI handler again, and restart the 'latched' NMI.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) * No trap (breakpoint or page fault) should be hit before nmi_restart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * thus there is no race between the first check of state for NOT_RUNNING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * and setting it to NMI_EXECUTING. The HW will prevent nested NMIs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) * at this point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) * In case the NMI takes a page fault, we need to save off the CR2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) * because the NMI could have preempted another page fault and corrupt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) * the CR2 that is about to be read. As nested NMIs must be restarted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) * and they can not take breakpoints or page faults, the update of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) * CR2 must be done before converting the nmi state back to NOT_RUNNING.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) * Otherwise, there would be a race of another nested NMI coming in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) * after setting state to NOT_RUNNING but before updating the nmi_cr2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) enum nmi_states {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) NMI_NOT_RUNNING = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) NMI_EXECUTING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) NMI_LATCHED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) static DEFINE_PER_CPU(enum nmi_states, nmi_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) static DEFINE_PER_CPU(unsigned long, nmi_cr2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) static DEFINE_PER_CPU(unsigned long, nmi_dr7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) DEFINE_IDTENTRY_RAW(exc_nmi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) irqentry_state_t irq_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) * Re-enable NMIs right here when running as an SEV-ES guest. This might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) * cause nested NMIs, but those can be handled safely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) sev_es_nmi_complete();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (IS_ENABLED(CONFIG_SMP) && arch_cpu_is_offline(smp_processor_id()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) if (this_cpu_read(nmi_state) != NMI_NOT_RUNNING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) this_cpu_write(nmi_state, NMI_LATCHED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) this_cpu_write(nmi_state, NMI_EXECUTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) this_cpu_write(nmi_cr2, read_cr2());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) nmi_restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) * Needs to happen before DR7 is accessed, because the hypervisor can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) * intercept DR7 reads/writes, turning those into #VC exceptions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) sev_es_ist_enter(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) this_cpu_write(nmi_dr7, local_db_save());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) irq_state = irqentry_nmi_enter(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) inc_irq_stat(__nmi_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) if (!ignore_nmis)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) default_do_nmi(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) irqentry_nmi_exit(regs, irq_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) local_db_restore(this_cpu_read(nmi_dr7));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) sev_es_ist_exit();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) if (unlikely(this_cpu_read(nmi_cr2) != read_cr2()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) write_cr2(this_cpu_read(nmi_cr2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) if (this_cpu_dec_return(nmi_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) goto nmi_restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) if (user_mode(regs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) mds_user_clear_cpu_buffers();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) #if defined(CONFIG_X86_64) && IS_ENABLED(CONFIG_KVM_INTEL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) DEFINE_IDTENTRY_RAW(exc_nmi_noist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) exc_nmi(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) #if IS_MODULE(CONFIG_KVM_INTEL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) EXPORT_SYMBOL_GPL(asm_exc_nmi_noist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) void stop_nmi(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) ignore_nmis++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) void restart_nmi(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) ignore_nmis--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) /* reset the back-to-back NMI logic */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) void local_touch_nmi(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) __this_cpu_write(last_nmi_rip, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) EXPORT_SYMBOL_GPL(local_touch_nmi);