^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * itmt.c: Support Intel Turbo Boost Max Technology 3.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * (C) Copyright 2016 Intel Corporation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Author: Tim Chen <tim.c.chen@linux.intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * the maximum turbo frequencies of some cores in a CPU package may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * higher than for the other cores in the same package. In that case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * better performance can be achieved by making the scheduler prefer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * to run tasks on the CPUs with higher max turbo frequencies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * This file provides functions and data structures for enabling the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * scheduler to favor scheduling on cores can be boosted to a higher
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * frequency under ITMT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/cpumask.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/cpuset.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/sysctl.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/nodemask.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) static DEFINE_MUTEX(itmt_update_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) /* Boolean to track if system has ITMT capabilities */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) static bool __read_mostly sched_itmt_capable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * Boolean to control whether we want to move processes to cpu capable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * Technology 3.0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * It can be set via /proc/sys/kernel/sched_itmt_enabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) unsigned int __read_mostly sysctl_sched_itmt_enabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) static int sched_itmt_update_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) void *buffer, size_t *lenp, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) unsigned int old_sysctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) mutex_lock(&itmt_update_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) if (!sched_itmt_capable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) mutex_unlock(&itmt_update_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) old_sysctl = sysctl_sched_itmt_enabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) x86_topology_update = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) rebuild_sched_domains();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) mutex_unlock(&itmt_update_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) static struct ctl_table itmt_kern_table[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) .procname = "sched_itmt_enabled",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) .data = &sysctl_sched_itmt_enabled,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) .maxlen = sizeof(unsigned int),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) .mode = 0644,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) .proc_handler = sched_itmt_update_handler,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) .extra1 = SYSCTL_ZERO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) .extra2 = SYSCTL_ONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) static struct ctl_table itmt_root_table[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) .procname = "kernel",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) .mode = 0555,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) .child = itmt_kern_table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) static struct ctl_table_header *itmt_sysctl_header;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * sched_set_itmt_support() - Indicate platform supports ITMT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * This function is used by the OS to indicate to scheduler that the platform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) * is capable of supporting the ITMT feature.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * The current scheme has the pstate driver detects if the system
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * is ITMT capable and call sched_set_itmt_support.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * This must be done only after sched_set_itmt_core_prio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * has been called to set the cpus' priorities.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * It must not be called with cpu hot plug lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * held as we need to acquire the lock to rebuild sched domains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) * later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * Return: 0 on success
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) int sched_set_itmt_support(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) mutex_lock(&itmt_update_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) if (sched_itmt_capable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) mutex_unlock(&itmt_update_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) itmt_sysctl_header = register_sysctl_table(itmt_root_table);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) if (!itmt_sysctl_header) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) mutex_unlock(&itmt_update_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) sched_itmt_capable = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) sysctl_sched_itmt_enabled = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) x86_topology_update = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) rebuild_sched_domains();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) mutex_unlock(&itmt_update_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * sched_clear_itmt_support() - Revoke platform's support of ITMT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) * This function is used by the OS to indicate that it has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * revoked the platform's support of ITMT feature.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * It must not be called with cpu hot plug lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * held as we need to acquire the lock to rebuild sched domains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) void sched_clear_itmt_support(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) mutex_lock(&itmt_update_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) if (!sched_itmt_capable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) mutex_unlock(&itmt_update_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) sched_itmt_capable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) if (itmt_sysctl_header) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) unregister_sysctl_table(itmt_sysctl_header);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) itmt_sysctl_header = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) if (sysctl_sched_itmt_enabled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) /* disable sched_itmt if we are no longer ITMT capable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) sysctl_sched_itmt_enabled = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) x86_topology_update = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) rebuild_sched_domains();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) mutex_unlock(&itmt_update_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) int arch_asym_cpu_priority(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) return per_cpu(sched_core_priority, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * @prio: Priority of cpu core
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) * @core_cpu: The cpu number associated with the core
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * The pstate driver will find out the max boost frequency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * and call this function to set a priority proportional
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) * to the max boost frequency. CPU with higher boost
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * frequency will receive higher priority.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * No need to rebuild sched domain after updating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * the CPU priorities. The sched domains have no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * dependency on CPU priorities.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) void sched_set_itmt_core_prio(int prio, int core_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) int cpu, i = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) int smt_prio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * Ensure that the siblings are moved to the end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * of the priority chain and only used when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * all other high priority cpus are out of capacity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) smt_prio = prio * smp_num_siblings / i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) per_cpu(sched_core_priority, cpu) = smt_prio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) }