^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* paravirtual clock -- common code used by kvm/xen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/clocksource.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/nmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <asm/fixmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <asm/pvclock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <asm/vgtod.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) static u8 valid_flags __read_mostly = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) static struct pvclock_vsyscall_time_info *pvti_cpu0_va __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) void pvclock_set_flags(u8 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) valid_flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) u64 pv_tsc_khz = 1000000ULL << 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) do_div(pv_tsc_khz, src->tsc_to_system_mul);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) if (src->tsc_shift < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) pv_tsc_khz <<= -src->tsc_shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) pv_tsc_khz >>= src->tsc_shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) return pv_tsc_khz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) void pvclock_touch_watchdogs(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) touch_softlockup_watchdog_sync();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) clocksource_touch_watchdog();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) rcu_cpu_stall_reset();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) reset_hung_task_detector();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) static atomic64_t last_value = ATOMIC64_INIT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) void pvclock_resume(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) atomic64_set(&last_value, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) unsigned version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) u8 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) version = pvclock_read_begin(src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) flags = src->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) } while (pvclock_read_retry(src, version));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) return flags & valid_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) unsigned version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) u64 ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) u64 last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) u8 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) version = pvclock_read_begin(src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) ret = __pvclock_read_cycles(src, rdtsc_ordered());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) flags = src->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) } while (pvclock_read_retry(src, version));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) src->flags &= ~PVCLOCK_GUEST_STOPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) pvclock_touch_watchdogs();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) if ((valid_flags & PVCLOCK_TSC_STABLE_BIT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) (flags & PVCLOCK_TSC_STABLE_BIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * Assumption here is that last_value, a global accumulator, always goes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * forward. If we are less than that, we should not be much smaller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * We assume there is an error marging we're inside, and then the correction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * does not sacrifice accuracy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * For reads: global may have changed between test and return,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * but this means someone else updated poked the clock at a later time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * We just need to make sure we are not seeing a backwards event.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * For updates: last_value = ret is not enough, since two vcpus could be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * updating at the same time, and one of them could be slightly behind,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * making the assumption that last_value always go forward fail to hold.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) last = atomic64_read(&last_value);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) if (ret < last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) return last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) last = atomic64_cmpxchg(&last_value, last, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) } while (unlikely(last != ret));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) struct pvclock_vcpu_time_info *vcpu_time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) struct timespec64 *ts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) u32 version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) u64 delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) struct timespec64 now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) /* get wallclock at system boot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) version = wall_clock->version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) rmb(); /* fetch version before time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * Note: wall_clock->sec is a u32 value, so it can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * only store dates between 1970 and 2106. To allow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * times beyond that, we need to create a new hypercall
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * interface with an extended pvclock_wall_clock structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * like ARM has.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) now.tv_sec = wall_clock->sec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) now.tv_nsec = wall_clock->nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) rmb(); /* fetch time before checking version */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) } while ((wall_clock->version & 1) || (version != wall_clock->version));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) delta += now.tv_sec * NSEC_PER_SEC + now.tv_nsec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) now.tv_nsec = do_div(delta, NSEC_PER_SEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) now.tv_sec = delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) set_normalized_timespec64(ts, now.tv_sec, now.tv_nsec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) WARN_ON(vclock_was_used(VDSO_CLOCKMODE_PVCLOCK));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) pvti_cpu0_va = pvti;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) struct pvclock_vsyscall_time_info *pvclock_get_pvti_cpu0_va(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) return pvti_cpu0_va;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) EXPORT_SYMBOL_GPL(pvclock_get_pvti_cpu0_va);