^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * check TSC synchronization.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2006, Red Hat, Inc., Ingo Molnar
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * We check whether all boot CPUs have their TSC's synchronized,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * print a warning if not and turn off the TSC clock-source.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * The warp-check is point-to-point between two CPUs, the CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * initiating the bootup is the 'source CPU', the freshly booting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * CPU is the 'target CPU'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Only two CPUs may participate - they can enter in any order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * ( The serial nature of the boot logic and the CPU hotplug lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * protects against more than 2 CPUs entering this code. )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/topology.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/nmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <asm/tsc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) struct tsc_adjust {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) s64 bootval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) s64 adjusted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) unsigned long nextcheck;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) bool warned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) static DEFINE_PER_CPU(struct tsc_adjust, tsc_adjust);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static struct timer_list tsc_sync_check_timer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * TSC's on different sockets may be reset asynchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * This may cause the TSC ADJUST value on socket 0 to be NOT 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) bool __read_mostly tsc_async_resets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) void mark_tsc_async_resets(char *reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) if (tsc_async_resets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) tsc_async_resets = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) pr_info("tsc: Marking TSC async resets true due to %s\n", reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) void tsc_verify_tsc_adjust(bool resume)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) struct tsc_adjust *adj = this_cpu_ptr(&tsc_adjust);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) s64 curval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) /* Skip unnecessary error messages if TSC already unstable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) if (check_tsc_unstable())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) /* Rate limit the MSR check */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) if (!resume && time_before(jiffies, adj->nextcheck))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) adj->nextcheck = jiffies + HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) rdmsrl(MSR_IA32_TSC_ADJUST, curval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) if (adj->adjusted == curval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) /* Restore the original value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) wrmsrl(MSR_IA32_TSC_ADJUST, adj->adjusted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) if (!adj->warned || resume) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) pr_warn(FW_BUG "TSC ADJUST differs: CPU%u %lld --> %lld. Restoring\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) smp_processor_id(), adj->adjusted, curval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) adj->warned = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * Normally the tsc_sync will be checked every time system enters idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * state, but there is still caveat that a system won't enter idle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * either because it's too busy or configured purposely to not enter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * idle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * So setup a periodic timer (every 10 minutes) to make sure the check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * is always on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) #define SYNC_CHECK_INTERVAL (HZ * 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) static void tsc_sync_check_timer_fn(struct timer_list *unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) int next_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) tsc_verify_tsc_adjust(false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) /* Run the check for all onlined CPUs in turn */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) if (next_cpu >= nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) next_cpu = cpumask_first(cpu_online_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) tsc_sync_check_timer.expires += SYNC_CHECK_INTERVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) add_timer_on(&tsc_sync_check_timer, next_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) static int __init start_sync_check_timer(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (!cpu_feature_enabled(X86_FEATURE_TSC_ADJUST) || tsc_clocksource_reliable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) timer_setup(&tsc_sync_check_timer, tsc_sync_check_timer_fn, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) tsc_sync_check_timer.expires = jiffies + SYNC_CHECK_INTERVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) add_timer(&tsc_sync_check_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) late_initcall(start_sync_check_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) static void tsc_sanitize_first_cpu(struct tsc_adjust *cur, s64 bootval,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) unsigned int cpu, bool bootcpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * First online CPU in a package stores the boot value in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * adjustment value. This value might change later via the sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * mechanism. If that fails we still can yell about boot values not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * being consistent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * On the boot cpu we just force set the ADJUST value to 0 if it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * non zero. We don't do that on non boot cpus because physical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * hotplug should have set the ADJUST register to a value > 0 so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * the TSC is in sync with the already running cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * Also don't force the ADJUST value to zero if that is a valid value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * for socket 0 as determined by the system arch. This is required
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * when multiple sockets are reset asynchronously with each other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * and socket 0 may not have an TSC ADJUST value of 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) if (bootcpu && bootval != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) if (likely(!tsc_async_resets)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) pr_warn(FW_BUG "TSC ADJUST: CPU%u: %lld force to 0\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) cpu, bootval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) wrmsrl(MSR_IA32_TSC_ADJUST, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) bootval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) pr_info("TSC ADJUST: CPU%u: %lld NOT forced to 0\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) cpu, bootval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) cur->adjusted = bootval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) #ifndef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) bool __init tsc_store_and_check_tsc_adjust(bool bootcpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) s64 bootval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) /* Skip unnecessary error messages if TSC already unstable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) if (check_tsc_unstable())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) cur->bootval = bootval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) cur->nextcheck = jiffies + HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) tsc_sanitize_first_cpu(cur, bootval, smp_processor_id(), bootcpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) #else /* !CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * Store and check the TSC ADJUST MSR if available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) bool tsc_store_and_check_tsc_adjust(bool bootcpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) struct tsc_adjust *ref, *cur = this_cpu_ptr(&tsc_adjust);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) unsigned int refcpu, cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) struct cpumask *mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) s64 bootval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) rdmsrl(MSR_IA32_TSC_ADJUST, bootval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) cur->bootval = bootval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) cur->nextcheck = jiffies + HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) cur->warned = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * If a non-zero TSC value for socket 0 may be valid then the default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * adjusted value cannot assumed to be zero either.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) if (tsc_async_resets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) cur->adjusted = bootval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * Check whether this CPU is the first in a package to come up. In
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * this case do not check the boot value against another package
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * because the new package might have been physically hotplugged,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * where TSC_ADJUST is expected to be different. When called on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * boot CPU topology_core_cpumask() might not be available yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) mask = topology_core_cpumask(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) refcpu = mask ? cpumask_any_but(mask, cpu) : nr_cpu_ids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) if (refcpu >= nr_cpu_ids) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) tsc_sanitize_first_cpu(cur, bootval, smp_processor_id(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) bootcpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) ref = per_cpu_ptr(&tsc_adjust, refcpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * Compare the boot value and complain if it differs in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * package.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) if (bootval != ref->bootval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) printk_once(FW_BUG "TSC ADJUST differs within socket(s), fixing all errors\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * The TSC_ADJUST values in a package must be the same. If the boot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * value on this newly upcoming CPU differs from the adjustment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * value of the already online CPU in this package, set it to that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * adjusted value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) if (bootval != ref->adjusted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) cur->adjusted = ref->adjusted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) wrmsrl(MSR_IA32_TSC_ADJUST, ref->adjusted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * We have the TSCs forced to be in sync on this package. Skip sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * test:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * Entry/exit counters that make sure that both CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * run the measurement code at once:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) static atomic_t start_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) static atomic_t stop_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) static atomic_t skip_test;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) static atomic_t test_runs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * We use a raw spinlock in this exceptional case, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * we want to have the fastest, inlined, non-debug version
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * of a critical section, to be able to prove TSC time-warps:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) static arch_spinlock_t sync_lock = __ARCH_SPIN_LOCK_UNLOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) static cycles_t last_tsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) static cycles_t max_warp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) static int nr_warps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) static int random_warps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * TSC-warp measurement loop running on both CPUs. This is not called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * if there is no TSC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) static cycles_t check_tsc_warp(unsigned int timeout)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) cycles_t start, now, prev, end, cur_max_warp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) int i, cur_warps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) start = rdtsc_ordered();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * The measurement runs for 'timeout' msecs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) end = start + (cycles_t) tsc_khz * timeout;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) for (i = 0; ; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * We take the global lock, measure TSC, save the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * previous TSC that was measured (possibly on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * another CPU) and update the previous TSC timestamp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) arch_spin_lock(&sync_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) prev = last_tsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) now = rdtsc_ordered();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) last_tsc = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) arch_spin_unlock(&sync_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) * Be nice every now and then (and also check whether
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * measurement is done [we also insert a 10 million
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * loops safety exit, so we dont lock up in case the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * TSC readout is totally broken]):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (unlikely(!(i & 7))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) if (now > end || i > 10000000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) touch_nmi_watchdog();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * Outside the critical section we can now see whether
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * we saw a time-warp of the TSC going backwards:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) if (unlikely(prev > now)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) arch_spin_lock(&sync_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) max_warp = max(max_warp, prev - now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) cur_max_warp = max_warp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * Check whether this bounces back and forth. Only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * one CPU should observe time going backwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) if (cur_warps != nr_warps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) random_warps++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) nr_warps++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) cur_warps = nr_warps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) arch_spin_unlock(&sync_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) WARN(!(now-start),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) "Warning: zero tsc calibration delta: %Ld [max: %Ld]\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) now-start, end-start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) return cur_max_warp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) * If the target CPU coming online doesn't have any of its core-siblings
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) * online, a timeout of 20msec will be used for the TSC-warp measurement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) * loop. Otherwise a smaller timeout of 2msec will be used, as we have some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) * information about this socket already (and this information grows as we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) * have more and more logical-siblings in that socket).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) * Ideally we should be able to skip the TSC sync check on the other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) * core-siblings, if the first logical CPU in a socket passed the sync test.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * But as the TSC is per-logical CPU and can potentially be modified wrongly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * by the bios, TSC sync test for smaller duration should be able
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) * to catch such errors. Also this will catch the condition where all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) * cores in the socket don't get reset at the same time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) static inline unsigned int loop_timeout(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) return (cpumask_weight(topology_core_cpumask(cpu)) > 1) ? 2 : 20;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) * Source CPU calls into this - it waits for the freshly booted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) * target CPU to arrive and then starts the measurement:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) void check_tsc_sync_source(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) int cpus = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * No need to check if we already know that the TSC is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * synchronized or if we have no TSC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) if (unsynchronized_tsc())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) * Set the maximum number of test runs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * 1 if the CPU does not provide the TSC_ADJUST MSR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * 3 if the MSR is available, so the target can try to adjust
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) if (!boot_cpu_has(X86_FEATURE_TSC_ADJUST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) atomic_set(&test_runs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) atomic_set(&test_runs, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * Wait for the target to start or to skip the test:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) while (atomic_read(&start_count) != cpus - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) if (atomic_read(&skip_test) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) atomic_set(&skip_test, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) * Trigger the target to continue into the measurement too:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) atomic_inc(&start_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) check_tsc_warp(loop_timeout(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) while (atomic_read(&stop_count) != cpus-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) * If the test was successful set the number of runs to zero and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) * stop. If not, decrement the number of runs an check if we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) * retry. In case of random warps no retry is attempted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) if (!nr_warps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) atomic_set(&test_runs, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) pr_debug("TSC synchronization [CPU#%d -> CPU#%d]: passed\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) smp_processor_id(), cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) } else if (atomic_dec_and_test(&test_runs) || random_warps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) /* Force it to 0 if random warps brought us here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) atomic_set(&test_runs, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) pr_warn("TSC synchronization [CPU#%d -> CPU#%d]:\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) smp_processor_id(), cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) pr_warn("Measured %Ld cycles TSC warp between CPUs, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) "turning off TSC clock.\n", max_warp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) if (random_warps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) pr_warn("TSC warped randomly between CPUs\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) mark_tsc_unstable("check_tsc_sync_source failed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * Reset it - just in case we boot another CPU later:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) atomic_set(&start_count, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) random_warps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) nr_warps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) max_warp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) last_tsc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) * Let the target continue with the bootup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) atomic_inc(&stop_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) * Retry, if there is a chance to do so.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) if (atomic_read(&test_runs) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * Freshly booted CPUs call into this:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) void check_tsc_sync_target(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) struct tsc_adjust *cur = this_cpu_ptr(&tsc_adjust);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) unsigned int cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) cycles_t cur_max_warp, gbl_max_warp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) int cpus = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) /* Also aborts if there is no TSC. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) if (unsynchronized_tsc())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) * Store, verify and sanitize the TSC adjust register. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) * successful skip the test.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) * The test is also skipped when the TSC is marked reliable. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) * is true for SoCs which have no fallback clocksource. On these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) * SoCs the TSC is frequency synchronized, but still the TSC ADJUST
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) * register might have been wreckaged by the BIOS..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) atomic_inc(&skip_test);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) * Register this CPU's participation and wait for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * source CPU to start the measurement:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) atomic_inc(&start_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) while (atomic_read(&start_count) != cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) cur_max_warp = check_tsc_warp(loop_timeout(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) * Store the maximum observed warp value for a potential retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) gbl_max_warp = max_warp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) * Ok, we are done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) atomic_inc(&stop_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * Wait for the source CPU to print stuff:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) while (atomic_read(&stop_count) != cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * Reset it for the next sync test:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) atomic_set(&stop_count, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) * Check the number of remaining test runs. If not zero, the test
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) * failed and a retry with adjusted TSC is possible. If zero the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) * test was either successful or failed terminally.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) if (!atomic_read(&test_runs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) * If the warp value of this CPU is 0, then the other CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * observed time going backwards so this TSC was ahead and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * needs to move backwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) if (!cur_max_warp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) cur_max_warp = -gbl_max_warp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) * Add the result to the previous adjustment value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * The adjustement value is slightly off by the overhead of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * sync mechanism (observed values are ~200 TSC cycles), but this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * really depends on CPU, node distance and frequency. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) * compensating for this is hard to get right. Experiments show
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) * that the warp is not longer detectable when the observed warp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) * value is used. In the worst case the adjustment needs to go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) * through a 3rd run for fine tuning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) cur->adjusted += cur_max_warp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) pr_warn("TSC ADJUST compensate: CPU%u observed %lld warp. Adjust: %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) cpu, cur_max_warp, cur->adjusted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) wrmsrl(MSR_IA32_TSC_ADJUST, cur->adjusted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) #endif /* CONFIG_SMP */