^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * KVM paravirt_ops implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright IBM Corporation, 2007
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Authors: Anthony Liguori <aliguori@us.ibm.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #define pr_fmt(fmt) "kvm-guest: " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/context_tracking.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/irq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/kvm_para.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/hardirq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/reboot.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/kprobes.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/nmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/swait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/syscore_ops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <asm/timer.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <asm/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <asm/traps.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <asm/desc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <asm/apic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <asm/apicdef.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <asm/hypervisor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <asm/tlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <asm/cpuidle_haltpoll.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <asm/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <asm/reboot.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <asm/svm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static int kvmapf = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) static int __init parse_no_kvmapf(char *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) kvmapf = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) early_param("no-kvmapf", parse_no_kvmapf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) static int steal_acc = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) static int __init parse_no_stealacc(char *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) steal_acc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) early_param("no-steal-acc", parse_no_stealacc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) static int has_steal_clock = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * No need for any "IO delay" on KVM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) static void kvm_io_delay(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #define KVM_TASK_SLEEP_HASHBITS 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #define KVM_TASK_SLEEP_HASHSIZE (1<<KVM_TASK_SLEEP_HASHBITS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) struct kvm_task_sleep_node {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) struct hlist_node link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct swait_queue_head wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) u32 token;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) static struct kvm_task_sleep_head {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) raw_spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) struct hlist_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) } async_pf_sleepers[KVM_TASK_SLEEP_HASHSIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) u32 token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) struct hlist_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) hlist_for_each(p, &b->list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) struct kvm_task_sleep_node *n =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) hlist_entry(p, typeof(*n), link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) if (n->token == token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) return n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) struct kvm_task_sleep_node *e;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) raw_spin_lock(&b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) e = _find_apf_task(b, token);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) if (e) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) /* dummy entry exist -> wake up was delivered ahead of PF */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) hlist_del(&e->link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) raw_spin_unlock(&b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) kfree(e);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) n->token = token;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) n->cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) init_swait_queue_head(&n->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) hlist_add_head(&n->link, &b->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) raw_spin_unlock(&b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * @token: Token to identify the sleep node entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) * Invoked from the async pagefault handling code or from the VM exit page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * fault handler. In both cases RCU is watching.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) void kvm_async_pf_task_wait_schedule(u32 token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) struct kvm_task_sleep_node n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) DECLARE_SWAITQUEUE(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) lockdep_assert_irqs_disabled();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) if (!kvm_async_pf_queue_task(token, &n))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (hlist_unhashed(&n.link))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) local_irq_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) finish_swait(&n.wq, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) static void apf_task_wake_one(struct kvm_task_sleep_node *n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) hlist_del_init(&n->link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) if (swq_has_sleeper(&n->wq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) swake_up_one(&n->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) static void apf_task_wake_all(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) struct kvm_task_sleep_head *b = &async_pf_sleepers[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) struct kvm_task_sleep_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) struct hlist_node *p, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) raw_spin_lock(&b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) hlist_for_each_safe(p, next, &b->list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) n = hlist_entry(p, typeof(*n), link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) if (n->cpu == smp_processor_id())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) apf_task_wake_one(n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) raw_spin_unlock(&b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) void kvm_async_pf_task_wake(u32 token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) struct kvm_task_sleep_head *b = &async_pf_sleepers[key];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) struct kvm_task_sleep_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) if (token == ~0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) apf_task_wake_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) raw_spin_lock(&b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) n = _find_apf_task(b, token);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) if (!n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * async PF was not yet handled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * Add dummy entry for the token.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) n = kzalloc(sizeof(*n), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) if (!n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * Allocation failed! Busy wait while other cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * handles async PF.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) raw_spin_unlock(&b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) n->token = token;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) n->cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) init_swait_queue_head(&n->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) hlist_add_head(&n->link, &b->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) apf_task_wake_one(n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) raw_spin_unlock(&b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) noinstr u32 kvm_read_and_reset_apf_flags(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) u32 flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) if (__this_cpu_read(apf_reason.enabled)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) flags = __this_cpu_read(apf_reason.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) __this_cpu_write(apf_reason.flags, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) return flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) noinstr bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) u32 flags = kvm_read_and_reset_apf_flags();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) irqentry_state_t state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) if (!flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) state = irqentry_enter(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * If the host managed to inject an async #PF into an interrupt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * disabled region, then die hard as this is not going to end well
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * and the host side is seriously broken.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) if (unlikely(!(regs->flags & X86_EFLAGS_IF)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) panic("Host injected async #PF in interrupt disabled region\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) if (flags & KVM_PV_REASON_PAGE_NOT_PRESENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) if (unlikely(!(user_mode(regs))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) panic("Host injected async #PF in kernel mode\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) /* Page is swapped out by the host. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) kvm_async_pf_task_wait_schedule(token);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) WARN_ONCE(1, "Unexpected async PF flags: %x\n", flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) irqentry_exit(regs, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) struct pt_regs *old_regs = set_irq_regs(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) u32 token;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) ack_APIC_irq();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) inc_irq_stat(irq_hv_callback_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) if (__this_cpu_read(apf_reason.enabled)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) token = __this_cpu_read(apf_reason.token);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) kvm_async_pf_task_wake(token);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) __this_cpu_write(apf_reason.token, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) wrmsrl(MSR_KVM_ASYNC_PF_ACK, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) set_irq_regs(old_regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) static void __init paravirt_ops_setup(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) pv_info.name = "KVM";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) pv_ops.cpu.io_delay = kvm_io_delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) #ifdef CONFIG_X86_IO_APIC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) no_timer_check = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) static void kvm_register_steal_time(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) int cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) struct kvm_steal_time *st = &per_cpu(steal_time, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (!has_steal_clock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) wrmsrl(MSR_KVM_STEAL_TIME, (slow_virt_to_phys(st) | KVM_MSR_ENABLED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) pr_info("stealtime: cpu %d, msr %llx\n", cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) (unsigned long long) slow_virt_to_phys(st));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) static DEFINE_PER_CPU_DECRYPTED(unsigned long, kvm_apic_eoi) = KVM_PV_EOI_DISABLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * This relies on __test_and_clear_bit to modify the memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * in a way that is atomic with respect to the local CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * The hypervisor only accesses this memory from the local CPU so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * there's no need for lock or memory barriers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * An optimization barrier is implied in apic write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) if (__test_and_clear_bit(KVM_PV_EOI_BIT, this_cpu_ptr(&kvm_apic_eoi)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) apic->native_eoi_write(APIC_EOI, APIC_EOI_ACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) static void kvm_guest_cpu_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) pa |= KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) pa |= KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) __this_cpu_write(apf_reason.enabled, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) unsigned long pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) /* Size alignment is implied but just to make it explicit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) __this_cpu_write(kvm_apic_eoi, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) pa = slow_virt_to_phys(this_cpu_ptr(&kvm_apic_eoi))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) | KVM_MSR_ENABLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) wrmsrl(MSR_KVM_PV_EOI_EN, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) if (has_steal_clock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) kvm_register_steal_time();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) static void kvm_pv_disable_apf(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) if (!__this_cpu_read(apf_reason.enabled))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) __this_cpu_write(apf_reason.enabled, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) static void kvm_disable_steal_time(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) if (!has_steal_clock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) static void kvm_pv_guest_cpu_reboot(void *unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) * We disable PV EOI before we load a new kernel by kexec,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) * New kernel can re-enable when it boots.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) wrmsrl(MSR_KVM_PV_EOI_EN, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) kvm_pv_disable_apf();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) kvm_disable_steal_time();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) static int kvm_pv_reboot_notify(struct notifier_block *nb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) unsigned long code, void *unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) if (code == SYS_RESTART)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) return NOTIFY_DONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) static struct notifier_block kvm_pv_reboot_nb = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) .notifier_call = kvm_pv_reboot_notify,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) static u64 kvm_steal_clock(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) u64 steal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) struct kvm_steal_time *src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) int version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) src = &per_cpu(steal_time, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) version = src->version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) virt_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) steal = src->steal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) virt_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) } while ((version & 1) || (version != src->version));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) return steal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) early_set_memory_decrypted((unsigned long) ptr, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * Iterate through all possible CPUs and map the memory region pointed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * by apf_reason, steal_time and kvm_apic_eoi as decrypted at once.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * Note: we iterate through all possible CPUs to ensure that CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * hotplugged will have their per-cpu variable already mapped as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * decrypted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) static void __init sev_map_percpu_data(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) if (!sev_active())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) __set_percpu_decrypted(&per_cpu(apf_reason, cpu), sizeof(apf_reason));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) __set_percpu_decrypted(&per_cpu(steal_time, cpu), sizeof(steal_time));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) __set_percpu_decrypted(&per_cpu(kvm_apic_eoi, cpu), sizeof(kvm_apic_eoi));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) static bool pv_tlb_flush_supported(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) static void kvm_guest_cpu_offline(bool shutdown)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) kvm_disable_steal_time();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) wrmsrl(MSR_KVM_PV_EOI_EN, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) kvm_pv_disable_apf();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) if (!shutdown)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) apf_task_wake_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) kvmclock_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) static int kvm_cpu_online(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) kvm_guest_cpu_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) static bool pv_ipi_supported(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) static bool pv_sched_yield_supported(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) return (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) !kvm_para_has_hint(KVM_HINTS_REALTIME) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) #define KVM_IPI_CLUSTER_SIZE (2 * BITS_PER_LONG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) static void __send_ipi_mask(const struct cpumask *mask, int vector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) int cpu, apic_id, icr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) int min = 0, max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) __uint128_t ipi_bitmap = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) u64 ipi_bitmap = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) if (cpumask_empty(mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) switch (vector) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) icr = APIC_DM_FIXED | vector;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) case NMI_VECTOR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) icr = APIC_DM_NMI;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) for_each_cpu(cpu, mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) apic_id = per_cpu(x86_cpu_to_apicid, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) if (!ipi_bitmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) min = max = apic_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) } else if (apic_id < min && max - apic_id < KVM_IPI_CLUSTER_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) ipi_bitmap <<= min - apic_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) min = apic_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) } else if (apic_id > min && apic_id < min + KVM_IPI_CLUSTER_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) max = apic_id < max ? max : apic_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) min = max = apic_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) ipi_bitmap = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) __set_bit(apic_id - min, (unsigned long *)&ipi_bitmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) if (ipi_bitmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) (unsigned long)(ipi_bitmap >> BITS_PER_LONG), min, icr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) WARN_ONCE(ret < 0, "kvm-guest: failed to send PV IPI: %ld",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) static void kvm_send_ipi_mask(const struct cpumask *mask, int vector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) __send_ipi_mask(mask, vector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) static void kvm_send_ipi_mask_allbutself(const struct cpumask *mask, int vector)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) unsigned int this_cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) struct cpumask *new_mask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) const struct cpumask *local_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) cpumask_copy(new_mask, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) cpumask_clear_cpu(this_cpu, new_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) local_mask = new_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) __send_ipi_mask(local_mask, vector);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) * Set the IPI entry points
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) static void kvm_setup_pv_ipi(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) apic->send_IPI_mask = kvm_send_ipi_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) apic->send_IPI_mask_allbutself = kvm_send_ipi_mask_allbutself;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) pr_info("setup PV IPIs\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) native_send_call_func_ipi(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) /* Make sure other vCPUs get a chance to run if they need to. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) for_each_cpu(cpu, mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) if (vcpu_is_preempted(cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) static void __init kvm_smp_prepare_boot_cpu(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) * Map the per-cpu variables as decrypted before kvm_guest_cpu_init()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) * shares the guest physical address with the hypervisor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) sev_map_percpu_data();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) kvm_guest_cpu_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) native_smp_prepare_boot_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) kvm_spinlock_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) static int kvm_cpu_down_prepare(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) kvm_guest_cpu_offline(false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) static int kvm_suspend(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) kvm_guest_cpu_offline(false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) static void kvm_resume(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) kvm_cpu_online(raw_smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) static struct syscore_ops kvm_syscore_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) .suspend = kvm_suspend,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) .resume = kvm_resume,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) * After a PV feature is registered, the host will keep writing to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) * registered memory location. If the guest happens to shutdown, this memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) * won't be valid. In cases like kexec, in which you install a new kernel, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) * means a random memory location will be kept being written.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) #ifdef CONFIG_KEXEC_CORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) static void kvm_crash_shutdown(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) kvm_guest_cpu_offline(true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) native_machine_crash_shutdown(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) static void kvm_flush_tlb_others(const struct cpumask *cpumask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) const struct flush_tlb_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) u8 state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) struct kvm_steal_time *src;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) cpumask_copy(flushmask, cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) * We have to call flush only on online vCPUs. And
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) * queue flush_on_enter for pre-empted vCPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) for_each_cpu(cpu, flushmask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) src = &per_cpu(steal_time, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) state = READ_ONCE(src->preempted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) if ((state & KVM_VCPU_PREEMPTED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) if (try_cmpxchg(&src->preempted, &state,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) state | KVM_VCPU_FLUSH_TLB))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) __cpumask_clear_cpu(cpu, flushmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) native_flush_tlb_others(flushmask, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) static void __init kvm_guest_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) paravirt_ops_setup();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) register_reboot_notifier(&kvm_pv_reboot_nb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) raw_spin_lock_init(&async_pf_sleepers[i].lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) has_steal_clock = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) pv_ops.time.steal_clock = kvm_steal_clock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (pv_tlb_flush_supported()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) pv_ops.mmu.tlb_remove_table = tlb_remove_table;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) pr_info("KVM setup pv remote TLB flush\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) apic_set_eoi_write(kvm_guest_apic_eoi_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_INT) && kvmapf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) static_branch_enable(&kvm_async_pf_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR, asm_sysvec_kvm_asyncpf_interrupt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) if (pv_sched_yield_supported()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) pr_info("setup PV sched yield\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) kvm_cpu_online, kvm_cpu_down_prepare) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) pr_err("failed to install cpu hotplug callbacks\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) sev_map_percpu_data();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) kvm_guest_cpu_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) #ifdef CONFIG_KEXEC_CORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) machine_ops.crash_shutdown = kvm_crash_shutdown;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) register_syscore_ops(&kvm_syscore_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) * Hard lockup detection is enabled by default. Disable it, as guests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) * can get false positives too easily, for example if the host is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) * overcommitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) hardlockup_detector_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) static noinline uint32_t __kvm_cpuid_base(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) if (boot_cpu_data.cpuid_level < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) return 0; /* So we don't blow up on old processors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) return hypervisor_cpuid_base("KVMKVMKVM\0\0\0", 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) static inline uint32_t kvm_cpuid_base(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) static int kvm_cpuid_base = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) if (kvm_cpuid_base == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) kvm_cpuid_base = __kvm_cpuid_base();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) return kvm_cpuid_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) bool kvm_para_available(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) return kvm_cpuid_base() != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) EXPORT_SYMBOL_GPL(kvm_para_available);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) unsigned int kvm_arch_para_features(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) return cpuid_eax(kvm_cpuid_base() | KVM_CPUID_FEATURES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) unsigned int kvm_arch_para_hints(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) return cpuid_edx(kvm_cpuid_base() | KVM_CPUID_FEATURES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) EXPORT_SYMBOL_GPL(kvm_arch_para_hints);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) static uint32_t __init kvm_detect(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) return kvm_cpuid_base();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) static void __init kvm_apic_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) #if defined(CONFIG_SMP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) if (pv_ipi_supported())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) kvm_setup_pv_ipi();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) static void __init kvm_init_platform(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) kvmclock_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) x86_platform.apic_post_init = kvm_apic_init;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) #if defined(CONFIG_AMD_MEM_ENCRYPT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) static void kvm_sev_es_hcall_prepare(struct ghcb *ghcb, struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) /* RAX and CPL are already in the GHCB */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) ghcb_set_rbx(ghcb, regs->bx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) ghcb_set_rcx(ghcb, regs->cx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) ghcb_set_rdx(ghcb, regs->dx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) ghcb_set_rsi(ghcb, regs->si);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) static bool kvm_sev_es_hcall_finish(struct ghcb *ghcb, struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) /* No checking of the return state needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) const __initconst struct hypervisor_x86 x86_hyper_kvm = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) .name = "KVM",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) .detect = kvm_detect,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) .type = X86_HYPER_KVM,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) .init.guest_late_init = kvm_guest_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) .init.x2apic_available = kvm_para_available,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) .init.init_platform = kvm_init_platform,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) #if defined(CONFIG_AMD_MEM_ENCRYPT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) .runtime.sev_es_hcall_prepare = kvm_sev_es_hcall_prepare,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) .runtime.sev_es_hcall_finish = kvm_sev_es_hcall_finish,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) static __init int activate_jump_labels(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) if (has_steal_clock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) static_key_slow_inc(¶virt_steal_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) if (steal_acc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) static_key_slow_inc(¶virt_steal_rq_enabled);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) arch_initcall(activate_jump_labels);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) static __init int kvm_alloc_cpumask(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) bool alloc = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) if (!kvm_para_available() || nopv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) if (pv_tlb_flush_supported())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) alloc = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) #if defined(CONFIG_SMP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) if (pv_ipi_supported())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) alloc = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) if (alloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) GFP_KERNEL, cpu_to_node(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) arch_initcall(kvm_alloc_cpumask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) #ifdef CONFIG_PARAVIRT_SPINLOCKS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) /* Kick a cpu by its apicid. Used to wake up a halted vcpu */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) static void kvm_kick_cpu(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) int apicid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) unsigned long flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) apicid = per_cpu(x86_cpu_to_apicid, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) #include <asm/qspinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) static void kvm_wait(u8 *ptr, u8 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) if (in_nmi())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) if (READ_ONCE(*ptr) != val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) * halt until it's our turn and kicked. Note that we do safe halt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) * for irq enabled case to avoid hang when lock info is overwritten
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) * in irq spinlock slowpath and no spurious interrupt occur to save us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) if (arch_irqs_disabled_flags(flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) halt();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) safe_halt();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) #ifdef CONFIG_X86_32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) __visible bool __kvm_vcpu_is_preempted(long cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) struct kvm_steal_time *src = &per_cpu(steal_time, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) return !!(src->preempted & KVM_VCPU_PREEMPTED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) #include <asm/asm-offsets.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) extern bool __raw_callee_save___kvm_vcpu_is_preempted(long);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) * restoring to/from the stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) asm(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) ".pushsection .text;"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) ".global __raw_callee_save___kvm_vcpu_is_preempted;"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) ".type __raw_callee_save___kvm_vcpu_is_preempted, @function;"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) "__raw_callee_save___kvm_vcpu_is_preempted:"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) "movq __per_cpu_offset(,%rdi,8), %rax;"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) "cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) "setne %al;"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) "ret;"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) ".size __raw_callee_save___kvm_vcpu_is_preempted, .-__raw_callee_save___kvm_vcpu_is_preempted;"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) ".popsection");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) void __init kvm_spinlock_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) * In case host doesn't support KVM_FEATURE_PV_UNHALT there is still an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) * advantage of keeping virt_spin_lock_key enabled: virt_spin_lock() is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) * preferred over native qspinlock when vCPU is preempted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) pr_info("PV spinlocks disabled, no host support\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) * Disable PV spinlocks and use native qspinlock when dedicated pCPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) * are available.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) if (kvm_para_has_hint(KVM_HINTS_REALTIME)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) pr_info("PV spinlocks disabled with KVM_HINTS_REALTIME hints\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) if (num_possible_cpus() == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) pr_info("PV spinlocks disabled, single CPU\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) if (nopvspin) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) pr_info("PV spinlocks disabled, forced by \"nopvspin\" parameter\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) pr_info("PV spinlocks enabled\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) __pv_init_lock_hash();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) pv_ops.lock.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) pv_ops.lock.queued_spin_unlock =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) PV_CALLEE_SAVE(__pv_queued_spin_unlock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) pv_ops.lock.wait = kvm_wait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) pv_ops.lock.kick = kvm_kick_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) pv_ops.lock.vcpu_is_preempted =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) PV_CALLEE_SAVE(__kvm_vcpu_is_preempted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) * When PV spinlock is enabled which is preferred over
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) * virt_spin_lock(), virt_spin_lock_key's value is meaningless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) * Just disable it anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) static_branch_disable(&virt_spin_lock_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) #endif /* CONFIG_PARAVIRT_SPINLOCKS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) #ifdef CONFIG_ARCH_CPUIDLE_HALTPOLL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) static void kvm_disable_host_haltpoll(void *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) wrmsrl(MSR_KVM_POLL_CONTROL, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) static void kvm_enable_host_haltpoll(void *i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) wrmsrl(MSR_KVM_POLL_CONTROL, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) void arch_haltpoll_enable(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) pr_err_once("host does not support poll control\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) pr_err_once("host upgrade recommended\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) /* Enable guest halt poll disables host halt poll */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) smp_call_function_single(cpu, kvm_disable_host_haltpoll, NULL, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) EXPORT_SYMBOL_GPL(arch_haltpoll_enable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) void arch_haltpoll_disable(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) if (!kvm_para_has_feature(KVM_FEATURE_POLL_CONTROL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) /* Disable guest halt poll enables host halt poll */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) smp_call_function_single(cpu, kvm_enable_host_haltpoll, NULL, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) EXPORT_SYMBOL_GPL(arch_haltpoll_disable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) #endif