^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* KVM paravirtual clock driver. A clocksource implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) Copyright (C) 2008 Glauber de Oliveira Costa, Red Hat Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/clocksource.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/kvm_para.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <asm/pvclock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <asm/msr.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <asm/apic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/hardirq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/cpuhotplug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/sched/clock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/set_memory.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <asm/hypervisor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <asm/mem_encrypt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <asm/x86_init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <asm/kvmclock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) static int kvmclock __initdata = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) static int kvmclock_vsyscall __initdata = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) static int msr_kvm_system_time __ro_after_init = MSR_KVM_SYSTEM_TIME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) static int msr_kvm_wall_clock __ro_after_init = MSR_KVM_WALL_CLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) static u64 kvm_sched_clock_offset __ro_after_init;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) static int __init parse_no_kvmclock(char *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) kvmclock = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) early_param("no-kvmclock", parse_no_kvmclock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) static int __init parse_no_kvmclock_vsyscall(char *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) kvmclock_vsyscall = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) early_param("no-kvmclock-vsyscall", parse_no_kvmclock_vsyscall);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /* Aligned to page sizes to match whats mapped via vsyscalls to userspace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #define HV_CLOCK_SIZE (sizeof(struct pvclock_vsyscall_time_info) * NR_CPUS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #define HVC_BOOT_ARRAY_SIZE \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) (PAGE_SIZE / sizeof(struct pvclock_vsyscall_time_info))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) static struct pvclock_vsyscall_time_info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) hv_clock_boot[HVC_BOOT_ARRAY_SIZE] __bss_decrypted __aligned(PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) static struct pvclock_wall_clock wall_clock __bss_decrypted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) static struct pvclock_vsyscall_time_info *hvclock_mem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) DEFINE_PER_CPU(struct pvclock_vsyscall_time_info *, hv_clock_per_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) EXPORT_PER_CPU_SYMBOL_GPL(hv_clock_per_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * The wallclock is the time of day when we booted. Since then, some time may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * have elapsed since the hypervisor wrote the data. So we try to account for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * that with system time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) static void kvm_get_wallclock(struct timespec64 *now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) wrmsrl(msr_kvm_wall_clock, slow_virt_to_phys(&wall_clock));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) pvclock_read_wallclock(&wall_clock, this_cpu_pvti(), now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) static int kvm_set_wallclock(const struct timespec64 *now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) static u64 kvm_clock_read(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) u64 ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) preempt_disable_notrace();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) ret = pvclock_clocksource_read(this_cpu_pvti());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) preempt_enable_notrace();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) static u64 kvm_clock_get_cycles(struct clocksource *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) return kvm_clock_read();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) static u64 kvm_sched_clock_read(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) return kvm_clock_read() - kvm_sched_clock_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) static inline void kvm_sched_clock_init(bool stable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) if (!stable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) clear_sched_clock_stable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) kvm_sched_clock_offset = kvm_clock_read();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) pv_ops.time.sched_clock = kvm_sched_clock_read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) pr_info("kvm-clock: using sched offset of %llu cycles",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) kvm_sched_clock_offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) BUILD_BUG_ON(sizeof(kvm_sched_clock_offset) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) sizeof(((struct pvclock_vcpu_time_info *)NULL)->system_time));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * If we don't do that, there is the possibility that the guest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * will calibrate under heavy load - thus, getting a lower lpj -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * and execute the delays themselves without load. This is wrong,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * because no delay loop can finish beforehand.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * Any heuristics is subject to fail, because ultimately, a large
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * poll of guests can be running and trouble each other. So we preset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * lpj here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) static unsigned long kvm_get_tsc_khz(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) return pvclock_tsc_khz(this_cpu_pvti());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) static void __init kvm_get_preset_lpj(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) unsigned long khz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) u64 lpj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) khz = kvm_get_tsc_khz();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) lpj = ((u64)khz * 1000);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) do_div(lpj, HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) preset_lpj = lpj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) bool kvm_check_and_clear_guest_paused(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) struct pvclock_vsyscall_time_info *src = this_cpu_hvclock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) if (!src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) if ((src->pvti.flags & PVCLOCK_GUEST_STOPPED) != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) src->pvti.flags &= ~PVCLOCK_GUEST_STOPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) pvclock_touch_watchdogs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) static int kvm_cs_enable(struct clocksource *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) struct clocksource kvm_clock = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) .name = "kvm-clock",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) .read = kvm_clock_get_cycles,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) .rating = 400,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) .mask = CLOCKSOURCE_MASK(64),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) .flags = CLOCK_SOURCE_IS_CONTINUOUS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) .enable = kvm_cs_enable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) EXPORT_SYMBOL_GPL(kvm_clock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) static void kvm_register_clock(char *txt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) struct pvclock_vsyscall_time_info *src = this_cpu_hvclock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) u64 pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) if (!src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) pa = slow_virt_to_phys(&src->pvti) | 0x01ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) wrmsrl(msr_kvm_system_time, pa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) pr_info("kvm-clock: cpu %d, msr %llx, %s", smp_processor_id(), pa, txt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) static void kvm_save_sched_clock_state(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) static void kvm_restore_sched_clock_state(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) kvm_register_clock("primary cpu clock, resume");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) #ifdef CONFIG_X86_LOCAL_APIC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) static void kvm_setup_secondary_clock(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) kvm_register_clock("secondary cpu clock");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) void kvmclock_disable(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) native_write_msr(msr_kvm_system_time, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) static void __init kvmclock_init_mem(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) unsigned long ncpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) unsigned int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct page *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if (HVC_BOOT_ARRAY_SIZE >= num_possible_cpus())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) ncpus = num_possible_cpus() - HVC_BOOT_ARRAY_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) order = get_order(ncpus * sizeof(*hvclock_mem));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) p = alloc_pages(GFP_KERNEL, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (!p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) pr_warn("%s: failed to alloc %d pages", __func__, (1U << order));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) hvclock_mem = page_address(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * hvclock is shared between the guest and the hypervisor, must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * be mapped decrypted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) if (sev_active()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) r = set_memory_decrypted((unsigned long) hvclock_mem,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 1UL << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) __free_pages(p, order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) hvclock_mem = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) pr_warn("kvmclock: set_memory_decrypted() failed. Disabling\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) memset(hvclock_mem, 0, PAGE_SIZE << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) static int __init kvm_setup_vsyscall_timeinfo(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) kvmclock_init_mem();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) if (per_cpu(hv_clock_per_cpu, 0) && kvmclock_vsyscall) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) u8 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) if (!(flags & PVCLOCK_TSC_STABLE_BIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) early_initcall(kvm_setup_vsyscall_timeinfo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) static int kvmclock_setup_percpu(unsigned int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) struct pvclock_vsyscall_time_info *p = per_cpu(hv_clock_per_cpu, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * The per cpu area setup replicates CPU0 data to all cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * pointers. So carefully check. CPU0 has been set up in init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) if (!cpu || (p && p != per_cpu(hv_clock_per_cpu, 0)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) /* Use the static page for the first CPUs, allocate otherwise */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) if (cpu < HVC_BOOT_ARRAY_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) p = &hv_clock_boot[cpu];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) else if (hvclock_mem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) p = hvclock_mem + cpu - HVC_BOOT_ARRAY_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) per_cpu(hv_clock_per_cpu, cpu) = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) return p ? 0 : -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) void __init kvmclock_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) u8 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) if (!kvm_para_available() || !kvmclock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) msr_kvm_system_time = MSR_KVM_SYSTEM_TIME_NEW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) msr_kvm_wall_clock = MSR_KVM_WALL_CLOCK_NEW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) } else if (!kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) if (cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "kvmclock:setup_percpu",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) kvmclock_setup_percpu, NULL) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) pr_info("kvm-clock: Using msrs %x and %x",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) msr_kvm_system_time, msr_kvm_wall_clock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) this_cpu_write(hv_clock_per_cpu, &hv_clock_boot[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) kvm_register_clock("primary cpu clock");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) pvclock_set_pvti_cpu0_va(hv_clock_boot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) if (kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE_STABLE_BIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) kvm_sched_clock_init(flags & PVCLOCK_TSC_STABLE_BIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) x86_platform.calibrate_tsc = kvm_get_tsc_khz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) x86_platform.calibrate_cpu = kvm_get_tsc_khz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) x86_platform.get_wallclock = kvm_get_wallclock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) x86_platform.set_wallclock = kvm_set_wallclock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) #ifdef CONFIG_X86_LOCAL_APIC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) x86_cpuinit.early_percpu_clock_init = kvm_setup_secondary_clock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) kvm_get_preset_lpj();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) * X86_FEATURE_NONSTOP_TSC is TSC runs at constant rate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) * with P/T states and does not stop in deep C-states.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) * Invariant TSC exposed by host means kvmclock is not necessary:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) * can use TSC as clocksource.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) boot_cpu_has(X86_FEATURE_NONSTOP_TSC) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) !check_tsc_unstable())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) kvm_clock.rating = 299;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) clocksource_register_hz(&kvm_clock, NSEC_PER_SEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) pv_info.name = "KVM";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) }