Orange Pi5 kernel

^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * kernel/sched/loadavg.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  * This file contains the magic bits required to compute the global loadavg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * figure. Its a silly number but people think its important. We go through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  * great pains to make it work on big machines and tickless kernels.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) #include "sched.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  * Global load-average calculations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  * We take a distributed and async approach to calculating the global load-avg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  * in order to minimize overhead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17)  * The global load average is an exponentially decaying average of nr_running +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18)  * nr_uninterruptible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20)  * Once every LOAD_FREQ:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22)  *   nr_active = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23)  *   for_each_possible_cpu(cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24)  *	nr_active += cpu_of(cpu)->nr_running + cpu_of(cpu)->nr_uninterruptible;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26)  *   avenrun[n] = avenrun[0] * exp_n + nr_active * (1 - exp_n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28)  * Due to a number of reasons the above turns in the mess below:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30)  *  - for_each_possible_cpu() is prohibitively expensive on machines with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31)  *    serious number of CPUs, therefore we need to take a distributed approach
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32)  *    to calculating nr_active.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34)  *        \Sum_i x_i(t) = \Sum_i x_i(t) - x_i(t_0) | x_i(t_0) := 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35)  *                      = \Sum_i { \Sum_j=1 x_i(t_j) - x_i(t_j-1) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37)  *    So assuming nr_active := 0 when we start out -- true per definition, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38)  *    can simply take per-CPU deltas and fold those into a global accumulate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39)  *    to obtain the same result. See calc_load_fold_active().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41)  *    Furthermore, in order to avoid synchronizing all per-CPU delta folding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42)  *    across the machine, we assume 10 ticks is sufficient time for every
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43)  *    CPU to have completed this task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45)  *    This places an upper-bound on the IRQ-off latency of the machine. Then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46)  *    again, being late doesn't loose the delta, just wrecks the sample.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48)  *  - cpu_rq()->nr_uninterruptible isn't accurately tracked per-CPU because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49)  *    this would add another cross-CPU cacheline miss and atomic operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50)  *    to the wakeup path. Instead we increment on whatever CPU the task ran
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51)  *    when it went into uninterruptible state and decrement on whatever CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52)  *    did the wakeup. This means that only the sum of nr_uninterruptible over
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53)  *    all CPUs yields the correct result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55)  *  This covers the NO_HZ=n code, for extra head-aches, see the comment below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) /* Variables and functions for calc_load */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) atomic_long_t calc_load_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) unsigned long calc_load_update;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) unsigned long avenrun[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) EXPORT_SYMBOL(avenrun); /* should be removed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65)  * get_avenrun - get the load average array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66)  * @loads:	pointer to dest load array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67)  * @offset:	offset to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68)  * @shift:	shift count to shift the result left
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70)  * These values are estimates at best, so no need for locking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	loads[0] = (avenrun[0] + offset) << shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	loads[1] = (avenrun[1] + offset) << shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	loads[2] = (avenrun[2] + offset) << shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) EXPORT_SYMBOL_GPL(get_avenrun);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) long calc_load_fold_active(struct rq *this_rq, long adjust)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	long nr_active, delta = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 	nr_active = this_rq->nr_running - adjust;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	nr_active += (long)this_rq->nr_uninterruptible;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	if (nr_active != this_rq->calc_load_active) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 		delta = nr_active - this_rq->calc_load_active;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 		this_rq->calc_load_active = nr_active;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 	return delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96)  * fixed_power_int - compute: x^n, in O(log n) time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98)  * @x:         base of the power
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99)  * @frac_bits: fractional bits of @x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)  * @n:         power to raise @x to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)  * By exploiting the relation between the definition of the natural power
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)  * function: x^n := x*x*...*x (x multiplied by itself for n times), and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)  * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)  * (where: n_i \elem {0, 1}, the binary vector representing n),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)  * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)  * of course trivially computable in O(log_2 n), the length of our binary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)  * vector.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) static unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) fixed_power_int(unsigned long x, unsigned int frac_bits, unsigned int n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	unsigned long result = 1UL << frac_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	if (n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 		for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 			if (n & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 				result *= x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 				result += 1UL << (frac_bits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 				result >>= frac_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 			n >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 			if (!n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 			x *= x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 			x += 1UL << (frac_bits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 			x >>= frac_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)  * a1 = a0 * e + a * (1 - e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)  * a2 = a1 * e + a * (1 - e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)  *    = (a0 * e + a * (1 - e)) * e + a * (1 - e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)  *    = a0 * e^2 + a * (1 - e) * (1 + e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)  * a3 = a2 * e + a * (1 - e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)  *    = (a0 * e^2 + a * (1 - e) * (1 + e)) * e + a * (1 - e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)  *    = a0 * e^3 + a * (1 - e) * (1 + e + e^2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)  *  ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)  * an = a0 * e^n + a * (1 - e) * (1 + e + ... + e^n-1) [1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)  *    = a0 * e^n + a * (1 - e) * (1 - e^n)/(1 - e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)  *    = a0 * e^n + a * (1 - e^n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)  * [1] application of the geometric series:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)  *              n         1 - x^(n+1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)  *     S_n := \Sum x^i = -------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)  *             i=0          1 - x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) calc_load_n(unsigned long load, unsigned long exp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	    unsigned long active, unsigned int n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	return calc_load(load, fixed_power_int(exp, FSHIFT, n), active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) #ifdef CONFIG_NO_HZ_COMMON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)  * Handle NO_HZ for the global load-average.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)  * Since the above described distributed algorithm to compute the global
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)  * load-average relies on per-CPU sampling from the tick, it is affected by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)  * NO_HZ.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)  * The basic idea is to fold the nr_active delta into a global NO_HZ-delta upon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)  * entering NO_HZ state such that we can include this as an 'extra' CPU delta
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)  * when we read the global state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)  * Obviously reality has to ruin such a delightfully simple scheme:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)  *  - When we go NO_HZ idle during the window, we can negate our sample
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)  *    contribution, causing under-accounting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)  *    We avoid this by keeping two NO_HZ-delta counters and flipping them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)  *    when the window starts, thus separating old and new NO_HZ load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)  *    The only trick is the slight shift in index flip for read vs write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)  *        0s            5s            10s           15s
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)  *          +10           +10           +10           +10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)  *        |-|-----------|-|-----------|-|-----------|-|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)  *    r:0 0 1           1 0           0 1           1 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)  *    w:0 1 1           0 0           1 1           0 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)  *    This ensures we'll fold the old NO_HZ contribution in this window while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)  *    accumlating the new one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)  *  - When we wake up from NO_HZ during the window, we push up our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)  *    contribution, since we effectively move our sample point to a known
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)  *    busy state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)  *    This is solved by pushing the window forward, and thus skipping the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)  *    sample, for this CPU (effectively using the NO_HZ-delta for this CPU which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)  *    was in effect at the time the window opened). This also solves the issue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)  *    of having to deal with a CPU having been in NO_HZ for multiple LOAD_FREQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)  *    intervals.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)  * When making the ILB scale, we should try to pull this in as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) static atomic_long_t calc_load_nohz[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) static int calc_load_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) static inline int calc_load_write_idx(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 	int idx = calc_load_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 	 * See calc_global_nohz(), if we observe the new index, we also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	 * need to observe the new update time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 	smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	 * If the folding window started, make sure we start writing in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 	 * next NO_HZ-delta.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	if (!time_before(jiffies, READ_ONCE(calc_load_update)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 		idx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 	return idx & 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) static inline int calc_load_read_idx(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	return calc_load_idx & 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) static void calc_load_nohz_fold(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 	long delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 	delta = calc_load_fold_active(rq, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	if (delta) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 		int idx = calc_load_write_idx();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 		atomic_long_add(delta, &calc_load_nohz[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) void calc_load_nohz_start(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 	 * We're going into NO_HZ mode, if there's any pending delta, fold it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 	 * into the pending NO_HZ delta.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	calc_load_nohz_fold(this_rq());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)  * Keep track of the load for NOHZ_FULL, must be called between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)  * calc_load_nohz_{start,stop}().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) void calc_load_nohz_remote(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 	calc_load_nohz_fold(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) void calc_load_nohz_stop(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	struct rq *this_rq = this_rq();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	 * If we're still before the pending sample window, we're done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 	this_rq->calc_load_update = READ_ONCE(calc_load_update);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 	if (time_before(jiffies, this_rq->calc_load_update))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 	 * We woke inside or after the sample window, this means we're already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 	 * accounted through the nohz accounting, so skip the entire deal and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 	 * sync up for the next window.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	if (time_before(jiffies, this_rq->calc_load_update + 10))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 		this_rq->calc_load_update += LOAD_FREQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) static long calc_load_nohz_read(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 	int idx = calc_load_read_idx();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	long delta = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	if (atomic_long_read(&calc_load_nohz[idx]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 		delta = atomic_long_xchg(&calc_load_nohz[idx], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	return delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)  * NO_HZ can leave us missing all per-CPU ticks calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)  * calc_load_fold_active(), but since a NO_HZ CPU folds its delta into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)  * calc_load_nohz per calc_load_nohz_start(), all we need to do is fold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)  * in the pending NO_HZ delta if our NO_HZ period crossed a load cycle boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)  * Once we've updated the global active value, we need to apply the exponential
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)  * weights adjusted to the number of cycles missed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) static void calc_global_nohz(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 	unsigned long sample_window;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 	long delta, active, n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 	sample_window = READ_ONCE(calc_load_update);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 	if (!time_before(jiffies, sample_window + 10)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 		 * Catch-up, fold however many we are behind still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 		delta = jiffies - sample_window - 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 		n = 1 + (delta / LOAD_FREQ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 		active = atomic_long_read(&calc_load_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 		active = active > 0 ? active * FIXED_1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 		avenrun[0] = calc_load_n(avenrun[0], EXP_1, active, n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 		avenrun[1] = calc_load_n(avenrun[1], EXP_5, active, n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 		avenrun[2] = calc_load_n(avenrun[2], EXP_15, active, n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 		WRITE_ONCE(calc_load_update, sample_window + n * LOAD_FREQ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 	 * Flip the NO_HZ index...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	 * Make sure we first write the new time then flip the index, so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 	 * calc_load_write_idx() will see the new time when it reads the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 	 * index, this avoids a double flip messing things up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 	smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 	calc_load_idx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) #else /* !CONFIG_NO_HZ_COMMON */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) static inline long calc_load_nohz_read(void) { return 0; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) static inline void calc_global_nohz(void) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) #endif /* CONFIG_NO_HZ_COMMON */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)  * calc_load - update the avenrun load estimates 10 ticks after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)  * CPUs have updated calc_load_tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)  * Called from the global timer code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) void calc_global_load(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 	unsigned long sample_window;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 	long active, delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 	sample_window = READ_ONCE(calc_load_update);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 	if (time_before(jiffies, sample_window + 10))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 	 * Fold the 'old' NO_HZ-delta to include all NO_HZ CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 	delta = calc_load_nohz_read();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 	if (delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 		atomic_long_add(delta, &calc_load_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	active = atomic_long_read(&calc_load_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	active = active > 0 ? active * FIXED_1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 	avenrun[0] = calc_load(avenrun[0], EXP_1, active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 	WRITE_ONCE(calc_load_update, sample_window + LOAD_FREQ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 	 * In case we went to NO_HZ for multiple LOAD_FREQ intervals
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 	 * catch up in bulk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 	calc_global_nohz();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)  * Called from scheduler_tick() to periodically update this CPU's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)  * active count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) void calc_global_load_tick(struct rq *this_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 	long delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 	if (time_before(jiffies, this_rq->calc_load_update))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 	delta  = calc_load_fold_active(this_rq, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 	if (delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 		atomic_long_add(delta, &calc_load_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 	this_rq->calc_load_update += LOAD_FREQ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) }