Orange Pi5 kernel

^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300     1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300     2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300     3)  * Completely Fair Scheduling (CFS) Class (SCHED_NORMAL/SCHED_BATCH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300     4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300     5)  *  Copyright (C) 2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300     6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300     7)  *  Interactivity improvements by Mike Galbraith
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300     8)  *  (C) 2007 Mike Galbraith <efault@gmx.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300     9)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    10)  *  Various enhancements by Dmitry Adamushko.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    11)  *  (C) 2007 Dmitry Adamushko <dmitry.adamushko@gmail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    12)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    13)  *  Group scheduling enhancements by Srivatsa Vaddagiri
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    14)  *  Copyright IBM Corporation, 2007
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    15)  *  Author: Srivatsa Vaddagiri <vatsa@linux.vnet.ibm.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    16)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    17)  *  Scaled math optimizations by Thomas Gleixner
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    18)  *  Copyright (C) 2007, Thomas Gleixner <tglx@linutronix.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    19)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    20)  *  Adaptive scheduling granularity, math enhancements by Peter Zijlstra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    21)  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    22)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    23) #include "sched.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    24) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    25) #include <trace/hooks/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    26) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    27) EXPORT_TRACEPOINT_SYMBOL_GPL(sched_stat_runtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    29) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    30)  * Targeted preemption latency for CPU-bound tasks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    31)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    32)  * NOTE: this latency value is not the same as the concept of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    33)  * 'timeslice length' - timeslices in CFS are of variable length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    34)  * and have no persistent notion like in traditional, time-slice
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    35)  * based scheduling concepts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    36)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    37)  * (to see the precise effective timeslice length of your workload,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    38)  *  run vmstat and monitor the context-switches (cs) field)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    39)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    40)  * (default: 6ms * (1 + ilog(ncpus)), units: nanoseconds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    41)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    42) unsigned int sysctl_sched_latency			= 6000000ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    43) EXPORT_SYMBOL_GPL(sysctl_sched_latency);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    44) static unsigned int normalized_sysctl_sched_latency	= 6000000ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    45) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    46) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    47)  * The initial- and re-scaling of tunables is configurable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    48)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    49)  * Options are:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    50)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    51)  *   SCHED_TUNABLESCALING_NONE - unscaled, always *1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    52)  *   SCHED_TUNABLESCALING_LOG - scaled logarithmical, *1+ilog(ncpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    53)  *   SCHED_TUNABLESCALING_LINEAR - scaled linear, *ncpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    54)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    55)  * (default SCHED_TUNABLESCALING_LOG = *(1+ilog(ncpus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    56)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    57) enum sched_tunable_scaling sysctl_sched_tunable_scaling = SCHED_TUNABLESCALING_LOG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    58) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    59) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    60)  * Minimal preemption granularity for CPU-bound tasks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    61)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    62)  * (default: 0.75 msec * (1 + ilog(ncpus)), units: nanoseconds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    63)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    64) unsigned int sysctl_sched_min_granularity			= 750000ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    65) EXPORT_SYMBOL_GPL(sysctl_sched_min_granularity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    66) static unsigned int normalized_sysctl_sched_min_granularity	= 750000ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    67) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    68) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    69)  * This value is kept at sysctl_sched_latency/sysctl_sched_min_granularity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    70)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    71) static unsigned int sched_nr_latency = 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    73) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    74)  * After fork, child runs first. If set to 0 (default) then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    75)  * parent will (try to) run first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    76)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    77) unsigned int sysctl_sched_child_runs_first __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    78) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    79) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    80)  * SCHED_OTHER wake-up granularity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    81)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    82)  * This option delays the preemption effects of decoupled workloads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    83)  * and reduces their over-scheduling. Synchronous workloads will still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    84)  * have immediate wakeup/sleep latencies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    85)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    86)  * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    87)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    88) unsigned int sysctl_sched_wakeup_granularity			= 1000000UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    89) static unsigned int normalized_sysctl_sched_wakeup_granularity	= 1000000UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    90) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    91) const_debug unsigned int sysctl_sched_migration_cost	= 500000UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    93) int sched_thermal_decay_shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    94) static int __init setup_sched_thermal_decay_shift(char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    95) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    96) 	int _shift = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    97) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    98) 	if (kstrtoint(str, 0, &_shift))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    99) 		pr_warn("Unable to set scheduler thermal pressure decay shift parameter\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   101) 	sched_thermal_decay_shift = clamp(_shift, 0, 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   102) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   104) __setup("sched_thermal_decay_shift=", setup_sched_thermal_decay_shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   106) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   107) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   108)  * For asym packing, by default the lower numbered CPU has higher priority.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   109)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   110) int __weak arch_asym_cpu_priority(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   112) 	return -cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   115) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   116)  * The margin used when comparing utilization with CPU capacity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   117)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   118)  * (default: ~20%)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   119)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   120) #define fits_capacity(cap, max)	((cap) * 1280 < (max) * 1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   122) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   124) #ifdef CONFIG_CFS_BANDWIDTH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   125) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   126)  * Amount of runtime to allocate from global (tg) to local (per-cfs_rq) pool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   127)  * each time a cfs_rq requests quota.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   128)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   129)  * Note: in the case that the slice exceeds the runtime remaining (either due
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   130)  * to consumption or the quota being specified to be smaller than the slice)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   131)  * we will always only issue the remaining available time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   132)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   133)  * (default: 5 msec, units: microseconds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   134)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   135) unsigned int sysctl_sched_cfs_bandwidth_slice		= 5000UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   136) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   138) static inline void update_load_add(struct load_weight *lw, unsigned long inc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   139) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   140) 	lw->weight += inc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   141) 	lw->inv_weight = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   144) static inline void update_load_sub(struct load_weight *lw, unsigned long dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   145) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   146) 	lw->weight -= dec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   147) 	lw->inv_weight = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   150) static inline void update_load_set(struct load_weight *lw, unsigned long w)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   151) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   152) 	lw->weight = w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   153) 	lw->inv_weight = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   157)  * Increase the granularity value when there are more CPUs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   158)  * because with more CPUs the 'effective latency' as visible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   159)  * to users decreases. But the relationship is not linear,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   160)  * so pick a second-best guess by going with the log2 of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   161)  * number of CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   162)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   163)  * This idea comes from the SD scheduler of Con Kolivas:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   164)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   165) static unsigned int get_update_sysctl_factor(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   166) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   167) 	unsigned int cpus = min_t(unsigned int, num_online_cpus(), 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   168) 	unsigned int factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   170) 	switch (sysctl_sched_tunable_scaling) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   171) 	case SCHED_TUNABLESCALING_NONE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   172) 		factor = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   173) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   174) 	case SCHED_TUNABLESCALING_LINEAR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   175) 		factor = cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   176) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   177) 	case SCHED_TUNABLESCALING_LOG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   178) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   179) 		factor = 1 + ilog2(cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   180) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   181) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   183) 	return factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   186) static void update_sysctl(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   188) 	unsigned int factor = get_update_sysctl_factor();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   190) #define SET_SYSCTL(name) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   191) 	(sysctl_##name = (factor) * normalized_sysctl_##name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   192) 	SET_SYSCTL(sched_min_granularity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   193) 	SET_SYSCTL(sched_latency);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   194) 	SET_SYSCTL(sched_wakeup_granularity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   195) #undef SET_SYSCTL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   198) void __init sched_init_granularity(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   199) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   200) 	update_sysctl();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   203) #define WMULT_CONST	(~0U)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   204) #define WMULT_SHIFT	32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   206) static void __update_inv_weight(struct load_weight *lw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   208) 	unsigned long w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   210) 	if (likely(lw->inv_weight))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   211) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   213) 	w = scale_load_down(lw->weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   215) 	if (BITS_PER_LONG > 32 && unlikely(w >= WMULT_CONST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   216) 		lw->inv_weight = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   217) 	else if (unlikely(!w))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   218) 		lw->inv_weight = WMULT_CONST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   219) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   220) 		lw->inv_weight = WMULT_CONST / w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   224)  * delta_exec * weight / lw.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   225)  *   OR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   226)  * (delta_exec * (weight * lw->inv_weight)) >> WMULT_SHIFT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   227)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   228)  * Either weight := NICE_0_LOAD and lw \e sched_prio_to_wmult[], in which case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   229)  * we're guaranteed shift stays positive because inv_weight is guaranteed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   230)  * fit 32 bits, and NICE_0_LOAD gives another 10 bits; therefore shift >= 22.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   231)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   232)  * Or, weight =< lw.weight (because lw.weight is the runqueue weight), thus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   233)  * weight/lw.weight <= 1, and therefore our shift will also be positive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   234)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   235) static u64 __calc_delta(u64 delta_exec, unsigned long weight, struct load_weight *lw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   237) 	u64 fact = scale_load_down(weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   238) 	int shift = WMULT_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   240) 	__update_inv_weight(lw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   242) 	if (unlikely(fact >> 32)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   243) 		while (fact >> 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   244) 			fact >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   245) 			shift--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   246) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   247) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   249) 	fact = mul_u32_u32(fact, lw->inv_weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   251) 	while (fact >> 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   252) 		fact >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   253) 		shift--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   254) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   256) 	return mul_u64_u32_shr(delta_exec, fact, shift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   260) const struct sched_class fair_sched_class;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   262) /**************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   263)  * CFS operations on generic schedulable entities:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   264)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   266) #ifdef CONFIG_FAIR_GROUP_SCHED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   267) static inline struct task_struct *task_of(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   269) 	SCHED_WARN_ON(!entity_is_task(se));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   270) 	return container_of(se, struct task_struct, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   273) /* Walk up scheduling entities hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   274) #define for_each_sched_entity(se) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   275) 		for (; se; se = se->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   277) static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   279) 	return p->se.cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   282) /* runqueue on which this entity is (to be) queued */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   283) static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   285) 	return se->cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   288) /* runqueue "owned" by this group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   289) static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   290) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   291) 	return grp->my_q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   294) static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   296) 	if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   297) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   299) 	if (cfs_rq && task_group_is_autogroup(cfs_rq->tg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   300) 		autogroup_path(cfs_rq->tg, path, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   301) 	else if (cfs_rq && cfs_rq->tg->css.cgroup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   302) 		cgroup_path(cfs_rq->tg->css.cgroup, path, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   303) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   304) 		strlcpy(path, "(null)", len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   305) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   307) static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   309) 	struct rq *rq = rq_of(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   310) 	int cpu = cpu_of(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   312) 	if (cfs_rq->on_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   313) 		return rq->tmp_alone_branch == &rq->leaf_cfs_rq_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   315) 	cfs_rq->on_list = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   317) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   318) 	 * Ensure we either appear before our parent (if already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   319) 	 * enqueued) or force our parent to appear after us when it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   320) 	 * enqueued. The fact that we always enqueue bottom-up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   321) 	 * reduces this to two cases and a special case for the root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   322) 	 * cfs_rq. Furthermore, it also means that we will always reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   323) 	 * tmp_alone_branch either when the branch is connected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   324) 	 * to a tree or when we reach the top of the tree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   325) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   326) 	if (cfs_rq->tg->parent &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   327) 	    cfs_rq->tg->parent->cfs_rq[cpu]->on_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   328) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   329) 		 * If parent is already on the list, we add the child
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   330) 		 * just before. Thanks to circular linked property of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   331) 		 * the list, this means to put the child at the tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   332) 		 * of the list that starts by parent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   333) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   334) 		list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   335) 			&(cfs_rq->tg->parent->cfs_rq[cpu]->leaf_cfs_rq_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   336) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   337) 		 * The branch is now connected to its tree so we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   338) 		 * reset tmp_alone_branch to the beginning of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   339) 		 * list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   340) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   341) 		rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   342) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   343) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   345) 	if (!cfs_rq->tg->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   346) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   347) 		 * cfs rq without parent should be put
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   348) 		 * at the tail of the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   349) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   350) 		list_add_tail_rcu(&cfs_rq->leaf_cfs_rq_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   351) 			&rq->leaf_cfs_rq_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   352) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   353) 		 * We have reach the top of a tree so we can reset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   354) 		 * tmp_alone_branch to the beginning of the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   355) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   356) 		rq->tmp_alone_branch = &rq->leaf_cfs_rq_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   357) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   358) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   360) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   361) 	 * The parent has not already been added so we want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   362) 	 * make sure that it will be put after us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   363) 	 * tmp_alone_branch points to the begin of the branch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   364) 	 * where we will add parent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   365) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   366) 	list_add_rcu(&cfs_rq->leaf_cfs_rq_list, rq->tmp_alone_branch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   367) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   368) 	 * update tmp_alone_branch to points to the new begin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   369) 	 * of the branch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   370) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   371) 	rq->tmp_alone_branch = &cfs_rq->leaf_cfs_rq_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   372) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   375) static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   377) 	if (cfs_rq->on_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   378) 		struct rq *rq = rq_of(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   380) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   381) 		 * With cfs_rq being unthrottled/throttled during an enqueue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   382) 		 * it can happen the tmp_alone_branch points the a leaf that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   383) 		 * we finally want to del. In this case, tmp_alone_branch moves
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   384) 		 * to the prev element but it will point to rq->leaf_cfs_rq_list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   385) 		 * at the end of the enqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   386) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   387) 		if (rq->tmp_alone_branch == &cfs_rq->leaf_cfs_rq_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   388) 			rq->tmp_alone_branch = cfs_rq->leaf_cfs_rq_list.prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   390) 		list_del_rcu(&cfs_rq->leaf_cfs_rq_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   391) 		cfs_rq->on_list = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   392) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   395) static inline void assert_list_leaf_cfs_rq(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   397) 	SCHED_WARN_ON(rq->tmp_alone_branch != &rq->leaf_cfs_rq_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   400) /* Iterate thr' all leaf cfs_rq's on a runqueue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   401) #define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)			\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   402) 	list_for_each_entry_safe(cfs_rq, pos, &rq->leaf_cfs_rq_list,	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   403) 				 leaf_cfs_rq_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   405) /* Do the two (enqueued) entities belong to the same group ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   406) static inline struct cfs_rq *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   407) is_same_group(struct sched_entity *se, struct sched_entity *pse)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   408) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   409) 	if (se->cfs_rq == pse->cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   410) 		return se->cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   412) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   415) static inline struct sched_entity *parent_entity(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   417) 	return se->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   420) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   421) find_matching_se(struct sched_entity **se, struct sched_entity **pse)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   423) 	int se_depth, pse_depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   425) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   426) 	 * preemption test can be made between sibling entities who are in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   427) 	 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   428) 	 * both tasks until we find their ancestors who are siblings of common
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   429) 	 * parent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   430) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   432) 	/* First walk up until both entities are at same depth */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   433) 	se_depth = (*se)->depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   434) 	pse_depth = (*pse)->depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   436) 	while (se_depth > pse_depth) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   437) 		se_depth--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   438) 		*se = parent_entity(*se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   439) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   441) 	while (pse_depth > se_depth) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   442) 		pse_depth--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   443) 		*pse = parent_entity(*pse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   444) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   446) 	while (!is_same_group(*se, *pse)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   447) 		*se = parent_entity(*se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   448) 		*pse = parent_entity(*pse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   449) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   452) #else	/* !CONFIG_FAIR_GROUP_SCHED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   454) static inline struct task_struct *task_of(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   455) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   456) 	return container_of(se, struct task_struct, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   459) #define for_each_sched_entity(se) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   460) 		for (; se; se = NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   462) static inline struct cfs_rq *task_cfs_rq(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   463) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   464) 	return &task_rq(p)->cfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   467) static inline struct cfs_rq *cfs_rq_of(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   468) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   469) 	struct task_struct *p = task_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   470) 	struct rq *rq = task_rq(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   472) 	return &rq->cfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   475) /* runqueue "owned" by this group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   476) static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   478) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   481) static inline void cfs_rq_tg_path(struct cfs_rq *cfs_rq, char *path, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   482) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   483) 	if (path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   484) 		strlcpy(path, "(null)", len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   487) static inline bool list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   489) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   492) static inline void list_del_leaf_cfs_rq(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   493) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   495) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   496) static inline void assert_list_leaf_cfs_rq(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   497) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   500) #define for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   501) 		for (cfs_rq = &rq->cfs, pos = NULL; cfs_rq; cfs_rq = pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   503) static inline struct sched_entity *parent_entity(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   505) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   508) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   509) find_matching_se(struct sched_entity **se, struct sched_entity **pse)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   510) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   513) #endif	/* CONFIG_FAIR_GROUP_SCHED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   515) static __always_inline
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   516) void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   517) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   518) /**************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   519)  * Scheduling class tree data structure manipulation methods:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   520)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   522) static inline u64 max_vruntime(u64 max_vruntime, u64 vruntime)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   523) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   524) 	s64 delta = (s64)(vruntime - max_vruntime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   525) 	if (delta > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   526) 		max_vruntime = vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   528) 	return max_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   531) static inline u64 min_vruntime(u64 min_vruntime, u64 vruntime)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   532) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   533) 	s64 delta = (s64)(vruntime - min_vruntime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   534) 	if (delta < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   535) 		min_vruntime = vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   537) 	return min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   540) static inline int entity_before(struct sched_entity *a,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   541) 				struct sched_entity *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   543) 	return (s64)(a->vruntime - b->vruntime) < 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   546) static void update_min_vruntime(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   547) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   548) 	struct sched_entity *curr = cfs_rq->curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   549) 	struct rb_node *leftmost = rb_first_cached(&cfs_rq->tasks_timeline);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   551) 	u64 vruntime = cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   553) 	if (curr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   554) 		if (curr->on_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   555) 			vruntime = curr->vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   556) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   557) 			curr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   558) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   560) 	if (leftmost) { /* non-empty tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   561) 		struct sched_entity *se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   562) 		se = rb_entry(leftmost, struct sched_entity, run_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   563) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   564) 		if (!curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   565) 			vruntime = se->vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   566) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   567) 			vruntime = min_vruntime(vruntime, se->vruntime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   568) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   570) 	/* ensure we never gain time by being placed backwards. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   571) 	cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   572) #ifndef CONFIG_64BIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   573) 	smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   574) 	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   575) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   578) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   579)  * Enqueue an entity into the rb-tree:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   580)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   581) static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   582) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   583) 	struct rb_node **link = &cfs_rq->tasks_timeline.rb_root.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   584) 	struct rb_node *parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   585) 	struct sched_entity *entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   586) 	bool leftmost = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   588) 	trace_android_rvh_enqueue_entity(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   589) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   590) 	 * Find the right place in the rbtree:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   591) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   592) 	while (*link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   593) 		parent = *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   594) 		entry = rb_entry(parent, struct sched_entity, run_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   595) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   596) 		 * We dont care about collisions. Nodes with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   597) 		 * the same key stay together.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   598) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   599) 		if (entity_before(se, entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   600) 			link = &parent->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   601) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   602) 			link = &parent->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   603) 			leftmost = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   604) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   605) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   607) 	rb_link_node(&se->run_node, parent, link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   608) 	rb_insert_color_cached(&se->run_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   609) 			       &cfs_rq->tasks_timeline, leftmost);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   612) static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   613) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   614) 	trace_android_rvh_dequeue_entity(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   615) 	rb_erase_cached(&se->run_node, &cfs_rq->tasks_timeline);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   618) struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   620) 	struct rb_node *left = rb_first_cached(&cfs_rq->tasks_timeline);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   622) 	if (!left)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   623) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   625) 	return rb_entry(left, struct sched_entity, run_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   628) static struct sched_entity *__pick_next_entity(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   629) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   630) 	struct rb_node *next = rb_next(&se->run_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   632) 	if (!next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   633) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   635) 	return rb_entry(next, struct sched_entity, run_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   638) #ifdef CONFIG_SCHED_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   639) struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   640) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   641) 	struct rb_node *last = rb_last(&cfs_rq->tasks_timeline.rb_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   643) 	if (!last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   644) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   646) 	return rb_entry(last, struct sched_entity, run_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   648) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   649) /**************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   650)  * Scheduling class statistics methods:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   651)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   653) int sched_proc_update_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   654) 		void *buffer, size_t *lenp, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   655) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   656) 	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   657) 	unsigned int factor = get_update_sysctl_factor();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   659) 	if (ret || !write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   660) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   662) 	sched_nr_latency = DIV_ROUND_UP(sysctl_sched_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   663) 					sysctl_sched_min_granularity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   665) #define WRT_SYSCTL(name) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   666) 	(normalized_sysctl_##name = sysctl_##name / (factor))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   667) 	WRT_SYSCTL(sched_min_granularity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   668) 	WRT_SYSCTL(sched_latency);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   669) 	WRT_SYSCTL(sched_wakeup_granularity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   670) #undef WRT_SYSCTL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   672) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   674) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   676) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   677)  * delta /= w
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   678)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   679) static inline u64 calc_delta_fair(u64 delta, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   680) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   681) 	if (unlikely(se->load.weight != NICE_0_LOAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   682) 		delta = __calc_delta(delta, NICE_0_LOAD, &se->load);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   684) 	return delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   687) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   688)  * The idea is to set a period in which each task runs once.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   689)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   690)  * When there are too many tasks (sched_nr_latency) we have to stretch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   691)  * this period because otherwise the slices get too small.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   692)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   693)  * p = (nr <= nl) ? l : l*nr/nl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   694)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   695) static u64 __sched_period(unsigned long nr_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   697) 	if (unlikely(nr_running > sched_nr_latency))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   698) 		return nr_running * sysctl_sched_min_granularity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   699) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   700) 		return sysctl_sched_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   703) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   704)  * We calculate the wall-time slice from the period by taking a part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   705)  * proportional to the weight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   706)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   707)  * s = p*P[w/rw]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   708)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   709) static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   710) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   711) 	unsigned int nr_running = cfs_rq->nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   712) 	u64 slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   714) 	if (sched_feat(ALT_PERIOD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   715) 		nr_running = rq_of(cfs_rq)->cfs.h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   717) 	slice = __sched_period(nr_running + !se->on_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   719) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   720) 		struct load_weight *load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   721) 		struct load_weight lw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   723) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   724) 		load = &cfs_rq->load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   726) 		if (unlikely(!se->on_rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   727) 			lw = cfs_rq->load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   729) 			update_load_add(&lw, se->load.weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   730) 			load = &lw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   731) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   732) 		slice = __calc_delta(slice, se->load.weight, load);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   733) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   735) 	if (sched_feat(BASE_SLICE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   736) 		slice = max(slice, (u64)sysctl_sched_min_granularity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   738) 	return slice;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   741) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   742)  * We calculate the vruntime slice of a to-be-inserted task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   743)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   744)  * vs = s/w
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   745)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   746) static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   747) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   748) 	return calc_delta_fair(sched_slice(cfs_rq, se), se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   751) #include "pelt.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   752) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   754) static int select_idle_sibling(struct task_struct *p, int prev_cpu, int cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   755) static unsigned long task_h_load(struct task_struct *p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   756) static unsigned long capacity_of(int cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   757) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   758) /* Give new sched_entity start runnable values to heavy its load in infant time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   759) void init_entity_runnable_average(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   760) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   761) 	struct sched_avg *sa = &se->avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   762) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   763) 	memset(sa, 0, sizeof(*sa));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   765) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   766) 	 * Tasks are initialized with full load to be seen as heavy tasks until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   767) 	 * they get a chance to stabilize to their real load level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   768) 	 * Group entities are initialized with zero load to reflect the fact that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   769) 	 * nothing has been attached to the task group yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   770) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   771) 	if (entity_is_task(se))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   772) 		sa->load_avg = scale_load_down(se->load.weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   774) 	/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   777) static void attach_entity_cfs_rq(struct sched_entity *se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   779) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   780)  * With new tasks being created, their initial util_avgs are extrapolated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   781)  * based on the cfs_rq's current util_avg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   782)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   783)  *   util_avg = cfs_rq->util_avg / (cfs_rq->load_avg + 1) * se.load.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   784)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   785)  * However, in many cases, the above util_avg does not give a desired
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   786)  * value. Moreover, the sum of the util_avgs may be divergent, such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   787)  * as when the series is a harmonic series.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   788)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   789)  * To solve this problem, we also cap the util_avg of successive tasks to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   790)  * only 1/2 of the left utilization budget:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   791)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   792)  *   util_avg_cap = (cpu_scale - cfs_rq->avg.util_avg) / 2^n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   793)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   794)  * where n denotes the nth task and cpu_scale the CPU capacity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   795)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   796)  * For example, for a CPU with 1024 of capacity, a simplest series from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   797)  * the beginning would be like:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   798)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   799)  *  task  util_avg: 512, 256, 128,  64,  32,   16,    8, ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   800)  * cfs_rq util_avg: 512, 768, 896, 960, 992, 1008, 1016, ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   801)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   802)  * Finally, that extrapolated util_avg is clamped to the cap (util_avg_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   803)  * if util_avg > util_avg_cap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   804)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   805) void post_init_entity_util_avg(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   806) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   807) 	struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   808) 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   809) 	struct sched_avg *sa = &se->avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   810) 	long cpu_scale = arch_scale_cpu_capacity(cpu_of(rq_of(cfs_rq)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   811) 	long cap = (long)(cpu_scale - cfs_rq->avg.util_avg) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   813) 	if (cap > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   814) 		if (cfs_rq->avg.util_avg != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   815) 			sa->util_avg  = cfs_rq->avg.util_avg * se->load.weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   816) 			sa->util_avg /= (cfs_rq->avg.load_avg + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   818) 			if (sa->util_avg > cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   819) 				sa->util_avg = cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   820) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   821) 			sa->util_avg = cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   822) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   823) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   824) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   825) 	sa->runnable_avg = sa->util_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   826) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   827) 	if (p->sched_class != &fair_sched_class) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   828) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   829) 		 * For !fair tasks do:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   830) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   831) 		update_cfs_rq_load_avg(now, cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   832) 		attach_entity_load_avg(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   833) 		switched_from_fair(rq, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   834) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   835) 		 * such that the next switched_to_fair() has the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   836) 		 * expected state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   837) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   838) 		se->avg.last_update_time = cfs_rq_clock_pelt(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   839) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   840) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   842) 	/* Hook before this se's util is attached to cfs_rq's util */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   843) 	trace_android_rvh_post_init_entity_util_avg(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   844) 	attach_entity_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   846) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   847) #else /* !CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   848) void init_entity_runnable_average(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   849) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   851) void post_init_entity_util_avg(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   854) static void update_tg_load_avg(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   855) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   857) #endif /* CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   859) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   860)  * Update the current task's runtime statistics.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   861)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   862) static void update_curr(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   863) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   864) 	struct sched_entity *curr = cfs_rq->curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   865) 	u64 now = rq_clock_task(rq_of(cfs_rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   866) 	u64 delta_exec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   867) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   868) 	if (unlikely(!curr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   869) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   871) 	delta_exec = now - curr->exec_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   872) 	if (unlikely((s64)delta_exec <= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   873) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   875) 	curr->exec_start = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   877) 	schedstat_set(curr->statistics.exec_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   878) 		      max(delta_exec, curr->statistics.exec_max));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   880) 	curr->sum_exec_runtime += delta_exec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   881) 	schedstat_add(cfs_rq->exec_clock, delta_exec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   883) 	curr->vruntime += calc_delta_fair(delta_exec, curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   884) 	update_min_vruntime(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   886) 	if (entity_is_task(curr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   887) 		struct task_struct *curtask = task_of(curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   889) 		trace_sched_stat_runtime(curtask, delta_exec, curr->vruntime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   890) 		cgroup_account_cputime(curtask, delta_exec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   891) 		account_group_exec_runtime(curtask, delta_exec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   892) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   894) 	account_cfs_rq_runtime(cfs_rq, delta_exec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   897) static void update_curr_fair(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   898) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   899) 	update_curr(cfs_rq_of(&rq->curr->se));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   902) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   903) update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   905) 	u64 wait_start, prev_wait_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   907) 	if (!schedstat_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   908) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   910) 	wait_start = rq_clock(rq_of(cfs_rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   911) 	prev_wait_start = schedstat_val(se->statistics.wait_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   912) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   913) 	if (entity_is_task(se) && task_on_rq_migrating(task_of(se)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   914) 	    likely(wait_start > prev_wait_start))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   915) 		wait_start -= prev_wait_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   917) 	__schedstat_set(se->statistics.wait_start, wait_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   919) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   920) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   921) update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   922) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   923) 	struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   924) 	u64 delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   926) 	if (!schedstat_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   927) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   929) 	delta = rq_clock(rq_of(cfs_rq)) - schedstat_val(se->statistics.wait_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   931) 	if (entity_is_task(se)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   932) 		p = task_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   933) 		if (task_on_rq_migrating(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   934) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   935) 			 * Preserve migrating task's wait time so wait_start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   936) 			 * time stamp can be adjusted to accumulate wait time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   937) 			 * prior to migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   938) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   939) 			__schedstat_set(se->statistics.wait_start, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   940) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   941) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   942) 		trace_sched_stat_wait(p, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   943) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   945) 	__schedstat_set(se->statistics.wait_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   946) 		      max(schedstat_val(se->statistics.wait_max), delta));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   947) 	__schedstat_inc(se->statistics.wait_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   948) 	__schedstat_add(se->statistics.wait_sum, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   949) 	__schedstat_set(se->statistics.wait_start, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   951) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   952) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   953) update_stats_enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   954) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   955) 	struct task_struct *tsk = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   956) 	u64 sleep_start, block_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   958) 	if (!schedstat_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   959) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   960) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   961) 	sleep_start = schedstat_val(se->statistics.sleep_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   962) 	block_start = schedstat_val(se->statistics.block_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   964) 	if (entity_is_task(se))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   965) 		tsk = task_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   967) 	if (sleep_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   968) 		u64 delta = rq_clock(rq_of(cfs_rq)) - sleep_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   970) 		if ((s64)delta < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   971) 			delta = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   972) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   973) 		if (unlikely(delta > schedstat_val(se->statistics.sleep_max)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   974) 			__schedstat_set(se->statistics.sleep_max, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   976) 		__schedstat_set(se->statistics.sleep_start, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   977) 		__schedstat_add(se->statistics.sum_sleep_runtime, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   979) 		if (tsk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   980) 			account_scheduler_latency(tsk, delta >> 10, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   981) 			trace_sched_stat_sleep(tsk, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   982) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   983) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   984) 	if (block_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   985) 		u64 delta = rq_clock(rq_of(cfs_rq)) - block_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   986) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   987) 		if ((s64)delta < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   988) 			delta = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   989) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   990) 		if (unlikely(delta > schedstat_val(se->statistics.block_max)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   991) 			__schedstat_set(se->statistics.block_max, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   993) 		__schedstat_set(se->statistics.block_start, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   994) 		__schedstat_add(se->statistics.sum_sleep_runtime, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   996) 		if (tsk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   997) 			if (tsk->in_iowait) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   998) 				__schedstat_add(se->statistics.iowait_sum, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   999) 				__schedstat_inc(se->statistics.iowait_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1000) 				trace_sched_stat_iowait(tsk, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1001) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1003) 			trace_sched_stat_blocked(tsk, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1005) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1006) 			 * Blocking time is in units of nanosecs, so shift by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1007) 			 * 20 to get a milliseconds-range estimation of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1008) 			 * amount of time that the task spent sleeping:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1009) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1010) 			if (unlikely(prof_on == SLEEP_PROFILING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1011) 				profile_hits(SLEEP_PROFILING,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1012) 						(void *)get_wchan(tsk),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1013) 						delta >> 20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1014) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1015) 			account_scheduler_latency(tsk, delta >> 10, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1016) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1017) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1020) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1021)  * Task is being enqueued - update stats:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1022)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1023) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1024) update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1025) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1026) 	if (!schedstat_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1027) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1028) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1029) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1030) 	 * Are we enqueueing a waiting task? (for current tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1031) 	 * a dequeue/enqueue event is a NOP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1032) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1033) 	if (se != cfs_rq->curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1034) 		update_stats_wait_start(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1036) 	if (flags & ENQUEUE_WAKEUP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1037) 		update_stats_enqueue_sleeper(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1040) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1041) update_stats_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1042) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1043) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1044) 	if (!schedstat_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1045) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1047) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1048) 	 * Mark the end of the wait period if dequeueing a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1049) 	 * waiting task:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1050) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1051) 	if (se != cfs_rq->curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1052) 		update_stats_wait_end(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1054) 	if ((flags & DEQUEUE_SLEEP) && entity_is_task(se)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1055) 		struct task_struct *tsk = task_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1057) 		if (tsk->state & TASK_INTERRUPTIBLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1058) 			__schedstat_set(se->statistics.sleep_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1059) 				      rq_clock(rq_of(cfs_rq)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1060) 		if (tsk->state & TASK_UNINTERRUPTIBLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1061) 			__schedstat_set(se->statistics.block_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1062) 				      rq_clock(rq_of(cfs_rq)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1063) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1065) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1066) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1067)  * We are picking a new current task - update its stats:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1068)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1069) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1070) update_stats_curr_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1071) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1072) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1073) 	 * We are starting a new run period:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1074) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1075) 	se->exec_start = rq_clock_task(rq_of(cfs_rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1076) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1078) /**************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1079)  * Scheduling class queueing methods:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1080)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1081) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1082) #ifdef CONFIG_NUMA_BALANCING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1083) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1084)  * Approximate time to scan a full NUMA task in ms. The task scan period is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1085)  * calculated based on the tasks virtual memory size and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1086)  * numa_balancing_scan_size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1087)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1088) unsigned int sysctl_numa_balancing_scan_period_min = 1000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1089) unsigned int sysctl_numa_balancing_scan_period_max = 60000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1090) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1091) /* Portion of address space to scan in MB */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1092) unsigned int sysctl_numa_balancing_scan_size = 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1093) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1094) /* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1095) unsigned int sysctl_numa_balancing_scan_delay = 1000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1097) struct numa_group {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1098) 	refcount_t refcount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1100) 	spinlock_t lock; /* nr_tasks, tasks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1101) 	int nr_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1102) 	pid_t gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1103) 	int active_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1105) 	struct rcu_head rcu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1106) 	unsigned long total_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1107) 	unsigned long max_faults_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1108) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1109) 	 * Faults_cpu is used to decide whether memory should move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1110) 	 * towards the CPU. As a consequence, these stats are weighted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1111) 	 * more by CPU use than by memory faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1112) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1113) 	unsigned long *faults_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1114) 	unsigned long faults[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1115) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1117) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1118)  * For functions that can be called in multiple contexts that permit reading
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1119)  * ->numa_group (see struct task_struct for locking rules).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1120)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1121) static struct numa_group *deref_task_numa_group(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1123) 	return rcu_dereference_check(p->numa_group, p == current ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1124) 		(lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1127) static struct numa_group *deref_curr_numa_group(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1129) 	return rcu_dereference_protected(p->numa_group, p == current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1132) static inline unsigned long group_faults_priv(struct numa_group *ng);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1133) static inline unsigned long group_faults_shared(struct numa_group *ng);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1135) static unsigned int task_nr_scan_windows(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1137) 	unsigned long rss = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1138) 	unsigned long nr_scan_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1140) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1141) 	 * Calculations based on RSS as non-present and empty pages are skipped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1142) 	 * by the PTE scanner and NUMA hinting faults should be trapped based
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1143) 	 * on resident pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1144) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1145) 	nr_scan_pages = sysctl_numa_balancing_scan_size << (20 - PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1146) 	rss = get_mm_rss(p->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1147) 	if (!rss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1148) 		rss = nr_scan_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1150) 	rss = round_up(rss, nr_scan_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1151) 	return rss / nr_scan_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1154) /* For sanitys sake, never scan more PTEs than MAX_SCAN_WINDOW MB/sec. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1155) #define MAX_SCAN_WINDOW 2560
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1157) static unsigned int task_scan_min(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1158) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1159) 	unsigned int scan_size = READ_ONCE(sysctl_numa_balancing_scan_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1160) 	unsigned int scan, floor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1161) 	unsigned int windows = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1163) 	if (scan_size < MAX_SCAN_WINDOW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1164) 		windows = MAX_SCAN_WINDOW / scan_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1165) 	floor = 1000 / windows;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1167) 	scan = sysctl_numa_balancing_scan_period_min / task_nr_scan_windows(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1168) 	return max_t(unsigned int, floor, scan);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1171) static unsigned int task_scan_start(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1173) 	unsigned long smin = task_scan_min(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1174) 	unsigned long period = smin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1175) 	struct numa_group *ng;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1177) 	/* Scale the maximum scan period with the amount of shared memory. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1178) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1179) 	ng = rcu_dereference(p->numa_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1180) 	if (ng) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1181) 		unsigned long shared = group_faults_shared(ng);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1182) 		unsigned long private = group_faults_priv(ng);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1184) 		period *= refcount_read(&ng->refcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1185) 		period *= shared + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1186) 		period /= private + shared + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1187) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1188) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1190) 	return max(smin, period);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1193) static unsigned int task_scan_max(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1195) 	unsigned long smin = task_scan_min(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1196) 	unsigned long smax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1197) 	struct numa_group *ng;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1199) 	/* Watch for min being lower than max due to floor calculations */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1200) 	smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1202) 	/* Scale the maximum scan period with the amount of shared memory. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1203) 	ng = deref_curr_numa_group(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1204) 	if (ng) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1205) 		unsigned long shared = group_faults_shared(ng);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1206) 		unsigned long private = group_faults_priv(ng);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1207) 		unsigned long period = smax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1209) 		period *= refcount_read(&ng->refcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1210) 		period *= shared + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1211) 		period /= private + shared + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1213) 		smax = max(smax, period);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1214) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1216) 	return max(smin, smax);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1219) static void account_numa_enqueue(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1221) 	rq->nr_numa_running += (p->numa_preferred_nid != NUMA_NO_NODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1222) 	rq->nr_preferred_running += (p->numa_preferred_nid == task_node(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1225) static void account_numa_dequeue(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1226) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1227) 	rq->nr_numa_running -= (p->numa_preferred_nid != NUMA_NO_NODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1228) 	rq->nr_preferred_running -= (p->numa_preferred_nid == task_node(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1230) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1231) /* Shared or private faults. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1232) #define NR_NUMA_HINT_FAULT_TYPES 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1234) /* Memory and CPU locality */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1235) #define NR_NUMA_HINT_FAULT_STATS (NR_NUMA_HINT_FAULT_TYPES * 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1237) /* Averaged statistics, and temporary buffers. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1238) #define NR_NUMA_HINT_FAULT_BUCKETS (NR_NUMA_HINT_FAULT_STATS * 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1240) pid_t task_numa_group_id(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1242) 	struct numa_group *ng;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1243) 	pid_t gid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1245) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1246) 	ng = rcu_dereference(p->numa_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1247) 	if (ng)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1248) 		gid = ng->gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1249) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1251) 	return gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1255)  * The averaged statistics, shared & private, memory & CPU,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1256)  * occupy the first half of the array. The second half of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1257)  * array is for current counters, which are averaged into the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1258)  * first set by task_numa_placement.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1259)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1260) static inline int task_faults_idx(enum numa_faults_stats s, int nid, int priv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1261) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1262) 	return NR_NUMA_HINT_FAULT_TYPES * (s * nr_node_ids + nid) + priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1265) static inline unsigned long task_faults(struct task_struct *p, int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1266) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1267) 	if (!p->numa_faults)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1268) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1270) 	return p->numa_faults[task_faults_idx(NUMA_MEM, nid, 0)] +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1271) 		p->numa_faults[task_faults_idx(NUMA_MEM, nid, 1)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1274) static inline unsigned long group_faults(struct task_struct *p, int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1276) 	struct numa_group *ng = deref_task_numa_group(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1278) 	if (!ng)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1279) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1281) 	return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1282) 		ng->faults[task_faults_idx(NUMA_MEM, nid, 1)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1285) static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1287) 	return group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 0)] +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1288) 		group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1291) static inline unsigned long group_faults_priv(struct numa_group *ng)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1293) 	unsigned long faults = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1294) 	int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1296) 	for_each_online_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1297) 		faults += ng->faults[task_faults_idx(NUMA_MEM, node, 1)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1298) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1300) 	return faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1303) static inline unsigned long group_faults_shared(struct numa_group *ng)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1304) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1305) 	unsigned long faults = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1306) 	int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1308) 	for_each_online_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1309) 		faults += ng->faults[task_faults_idx(NUMA_MEM, node, 0)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1310) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1312) 	return faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1315) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1316)  * A node triggering more than 1/3 as many NUMA faults as the maximum is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1317)  * considered part of a numa group's pseudo-interleaving set. Migrations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1318)  * between these nodes are slowed down, to allow things to settle down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1319)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1320) #define ACTIVE_NODE_FRACTION 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1322) static bool numa_is_active_node(int nid, struct numa_group *ng)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1323) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1324) 	return group_faults_cpu(ng, nid) * ACTIVE_NODE_FRACTION > ng->max_faults_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1327) /* Handle placement on systems where not all nodes are directly connected. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1328) static unsigned long score_nearby_nodes(struct task_struct *p, int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1329) 					int maxdist, bool task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1331) 	unsigned long score = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1332) 	int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1334) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1335) 	 * All nodes are directly connected, and the same distance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1336) 	 * from each other. No need for fancy placement algorithms.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1337) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1338) 	if (sched_numa_topology_type == NUMA_DIRECT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1339) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1341) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1342) 	 * This code is called for each node, introducing N^2 complexity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1343) 	 * which should be ok given the number of nodes rarely exceeds 8.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1344) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1345) 	for_each_online_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1346) 		unsigned long faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1347) 		int dist = node_distance(nid, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1349) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1350) 		 * The furthest away nodes in the system are not interesting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1351) 		 * for placement; nid was already counted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1352) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1353) 		if (dist == sched_max_numa_distance || node == nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1354) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1356) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1357) 		 * On systems with a backplane NUMA topology, compare groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1358) 		 * of nodes, and move tasks towards the group with the most
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1359) 		 * memory accesses. When comparing two nodes at distance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1360) 		 * "hoplimit", only nodes closer by than "hoplimit" are part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1361) 		 * of each group. Skip other nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1362) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1363) 		if (sched_numa_topology_type == NUMA_BACKPLANE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1364) 					dist >= maxdist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1365) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1367) 		/* Add up the faults from nearby nodes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1368) 		if (task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1369) 			faults = task_faults(p, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1370) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1371) 			faults = group_faults(p, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1373) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1374) 		 * On systems with a glueless mesh NUMA topology, there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1375) 		 * no fixed "groups of nodes". Instead, nodes that are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1376) 		 * directly connected bounce traffic through intermediate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1377) 		 * nodes; a numa_group can occupy any set of nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1378) 		 * The further away a node is, the less the faults count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1379) 		 * This seems to result in good task placement.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1380) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1381) 		if (sched_numa_topology_type == NUMA_GLUELESS_MESH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1382) 			faults *= (sched_max_numa_distance - dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1383) 			faults /= (sched_max_numa_distance - LOCAL_DISTANCE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1384) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1385) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1386) 		score += faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1387) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1389) 	return score;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1392) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1393)  * These return the fraction of accesses done by a particular task, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1394)  * task group, on a particular numa node.  The group weight is given a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1395)  * larger multiplier, in order to group tasks together that are almost
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1396)  * evenly spread out between numa nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1397)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1398) static inline unsigned long task_weight(struct task_struct *p, int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1399) 					int dist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1400) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1401) 	unsigned long faults, total_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1403) 	if (!p->numa_faults)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1404) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1406) 	total_faults = p->total_numa_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1408) 	if (!total_faults)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1409) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1411) 	faults = task_faults(p, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1412) 	faults += score_nearby_nodes(p, nid, dist, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1414) 	return 1000 * faults / total_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1417) static inline unsigned long group_weight(struct task_struct *p, int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1418) 					 int dist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1420) 	struct numa_group *ng = deref_task_numa_group(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1421) 	unsigned long faults, total_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1423) 	if (!ng)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1424) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1426) 	total_faults = ng->total_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1428) 	if (!total_faults)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1429) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1431) 	faults = group_faults(p, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1432) 	faults += score_nearby_nodes(p, nid, dist, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1434) 	return 1000 * faults / total_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1437) bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1438) 				int src_nid, int dst_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1440) 	struct numa_group *ng = deref_curr_numa_group(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1441) 	int dst_nid = cpu_to_node(dst_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1442) 	int last_cpupid, this_cpupid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1444) 	this_cpupid = cpu_pid_to_cpupid(dst_cpu, current->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1445) 	last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1447) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1448) 	 * Allow first faults or private faults to migrate immediately early in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1449) 	 * the lifetime of a task. The magic number 4 is based on waiting for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1450) 	 * two full passes of the "multi-stage node selection" test that is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1451) 	 * executed below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1452) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1453) 	if ((p->numa_preferred_nid == NUMA_NO_NODE || p->numa_scan_seq <= 4) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1454) 	    (cpupid_pid_unset(last_cpupid) || cpupid_match_pid(p, last_cpupid)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1455) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1457) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1458) 	 * Multi-stage node selection is used in conjunction with a periodic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1459) 	 * migration fault to build a temporal task<->page relation. By using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1460) 	 * a two-stage filter we remove short/unlikely relations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1461) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1462) 	 * Using P(p) ~ n_p / n_t as per frequentist probability, we can equate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1463) 	 * a task's usage of a particular page (n_p) per total usage of this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1464) 	 * page (n_t) (in a given time-span) to a probability.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1465) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1466) 	 * Our periodic faults will sample this probability and getting the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1467) 	 * same result twice in a row, given these samples are fully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1468) 	 * independent, is then given by P(n)^2, provided our sample period
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1469) 	 * is sufficiently short compared to the usage pattern.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1470) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1471) 	 * This quadric squishes small probabilities, making it less likely we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1472) 	 * act on an unlikely task<->page relation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1473) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1474) 	if (!cpupid_pid_unset(last_cpupid) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1475) 				cpupid_to_nid(last_cpupid) != dst_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1476) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1478) 	/* Always allow migrate on private faults */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1479) 	if (cpupid_match_pid(p, last_cpupid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1480) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1482) 	/* A shared fault, but p->numa_group has not been set up yet. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1483) 	if (!ng)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1484) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1486) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1487) 	 * Destination node is much more heavily used than the source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1488) 	 * node? Allow migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1489) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1490) 	if (group_faults_cpu(ng, dst_nid) > group_faults_cpu(ng, src_nid) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1491) 					ACTIVE_NODE_FRACTION)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1492) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1494) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1495) 	 * Distribute memory according to CPU & memory use on each node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1496) 	 * with 3/4 hysteresis to avoid unnecessary memory migrations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1497) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1498) 	 * faults_cpu(dst)   3   faults_cpu(src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1499) 	 * --------------- * - > ---------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1500) 	 * faults_mem(dst)   4   faults_mem(src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1501) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1502) 	return group_faults_cpu(ng, dst_nid) * group_faults(p, src_nid) * 3 >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1503) 	       group_faults_cpu(ng, src_nid) * group_faults(p, dst_nid) * 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1506) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1507)  * 'numa_type' describes the node at the moment of load balancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1508)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1509) enum numa_type {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1510) 	/* The node has spare capacity that can be used to run more tasks.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1511) 	node_has_spare = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1512) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1513) 	 * The node is fully used and the tasks don't compete for more CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1514) 	 * cycles. Nevertheless, some tasks might wait before running.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1515) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1516) 	node_fully_busy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1517) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1518) 	 * The node is overloaded and can't provide expected CPU cycles to all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1519) 	 * tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1520) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1521) 	node_overloaded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1522) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1524) /* Cached statistics for all CPUs within a node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1525) struct numa_stats {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1526) 	unsigned long load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1527) 	unsigned long runnable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1528) 	unsigned long util;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1529) 	/* Total compute capacity of CPUs on a node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1530) 	unsigned long compute_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1531) 	unsigned int nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1532) 	unsigned int weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1533) 	enum numa_type node_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1534) 	int idle_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1535) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1537) static inline bool is_core_idle(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1538) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1539) #ifdef CONFIG_SCHED_SMT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1540) 	int sibling;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1542) 	for_each_cpu(sibling, cpu_smt_mask(cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1543) 		if (cpu == sibling)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1544) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1546) 		if (!idle_cpu(sibling))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1547) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1548) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1549) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1551) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1554) struct task_numa_env {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1555) 	struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1557) 	int src_cpu, src_nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1558) 	int dst_cpu, dst_nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1560) 	struct numa_stats src_stats, dst_stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1562) 	int imbalance_pct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1563) 	int dist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1565) 	struct task_struct *best_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1566) 	long best_imp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1567) 	int best_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1568) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1570) static unsigned long cpu_load(struct rq *rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1571) static unsigned long cpu_runnable(struct rq *rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1572) static unsigned long cpu_util(int cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1573) static inline long adjust_numa_imbalance(int imbalance, int nr_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1575) static inline enum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1576) numa_type numa_classify(unsigned int imbalance_pct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1577) 			 struct numa_stats *ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1578) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1579) 	if ((ns->nr_running > ns->weight) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1580) 	    (((ns->compute_capacity * 100) < (ns->util * imbalance_pct)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1581) 	     ((ns->compute_capacity * imbalance_pct) < (ns->runnable * 100))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1582) 		return node_overloaded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1584) 	if ((ns->nr_running < ns->weight) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1585) 	    (((ns->compute_capacity * 100) > (ns->util * imbalance_pct)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1586) 	     ((ns->compute_capacity * imbalance_pct) > (ns->runnable * 100))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1587) 		return node_has_spare;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1589) 	return node_fully_busy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1592) #ifdef CONFIG_SCHED_SMT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1593) /* Forward declarations of select_idle_sibling helpers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1594) static inline bool test_idle_cores(int cpu, bool def);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1595) static inline int numa_idle_core(int idle_core, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1596) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1597) 	if (!static_branch_likely(&sched_smt_present) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1598) 	    idle_core >= 0 || !test_idle_cores(cpu, false))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1599) 		return idle_core;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1601) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1602) 	 * Prefer cores instead of packing HT siblings
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1603) 	 * and triggering future load balancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1604) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1605) 	if (is_core_idle(cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1606) 		idle_core = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1608) 	return idle_core;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1610) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1611) static inline int numa_idle_core(int idle_core, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1612) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1613) 	return idle_core;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1615) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1617) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1618)  * Gather all necessary information to make NUMA balancing placement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1619)  * decisions that are compatible with standard load balancer. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1620)  * borrows code and logic from update_sg_lb_stats but sharing a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1621)  * common implementation is impractical.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1622)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1623) static void update_numa_stats(struct task_numa_env *env,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1624) 			      struct numa_stats *ns, int nid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1625) 			      bool find_idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1626) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1627) 	int cpu, idle_core = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1629) 	memset(ns, 0, sizeof(*ns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1630) 	ns->idle_cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1632) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1633) 	for_each_cpu(cpu, cpumask_of_node(nid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1634) 		struct rq *rq = cpu_rq(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1636) 		ns->load += cpu_load(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1637) 		ns->runnable += cpu_runnable(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1638) 		ns->util += cpu_util(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1639) 		ns->nr_running += rq->cfs.h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1640) 		ns->compute_capacity += capacity_of(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1642) 		if (find_idle && !rq->nr_running && idle_cpu(cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1643) 			if (READ_ONCE(rq->numa_migrate_on) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1644) 			    !cpumask_test_cpu(cpu, env->p->cpus_ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1645) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1647) 			if (ns->idle_cpu == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1648) 				ns->idle_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1650) 			idle_core = numa_idle_core(idle_core, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1651) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1652) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1653) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1655) 	ns->weight = cpumask_weight(cpumask_of_node(nid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1657) 	ns->node_type = numa_classify(env->imbalance_pct, ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1659) 	if (idle_core >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1660) 		ns->idle_cpu = idle_core;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1663) static void task_numa_assign(struct task_numa_env *env,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1664) 			     struct task_struct *p, long imp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1665) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1666) 	struct rq *rq = cpu_rq(env->dst_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1668) 	/* Check if run-queue part of active NUMA balance. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1669) 	if (env->best_cpu != env->dst_cpu && xchg(&rq->numa_migrate_on, 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1670) 		int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1671) 		int start = env->dst_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1673) 		/* Find alternative idle CPU. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1674) 		for_each_cpu_wrap(cpu, cpumask_of_node(env->dst_nid), start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1675) 			if (cpu == env->best_cpu || !idle_cpu(cpu) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1676) 			    !cpumask_test_cpu(cpu, env->p->cpus_ptr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1677) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1678) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1680) 			env->dst_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1681) 			rq = cpu_rq(env->dst_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1682) 			if (!xchg(&rq->numa_migrate_on, 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1683) 				goto assign;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1684) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1686) 		/* Failed to find an alternative idle CPU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1687) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1688) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1690) assign:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1691) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1692) 	 * Clear previous best_cpu/rq numa-migrate flag, since task now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1693) 	 * found a better CPU to move/swap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1694) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1695) 	if (env->best_cpu != -1 && env->best_cpu != env->dst_cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1696) 		rq = cpu_rq(env->best_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1697) 		WRITE_ONCE(rq->numa_migrate_on, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1698) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1700) 	if (env->best_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1701) 		put_task_struct(env->best_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1702) 	if (p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1703) 		get_task_struct(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1705) 	env->best_task = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1706) 	env->best_imp = imp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1707) 	env->best_cpu = env->dst_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1709) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1710) static bool load_too_imbalanced(long src_load, long dst_load,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1711) 				struct task_numa_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1713) 	long imb, old_imb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1714) 	long orig_src_load, orig_dst_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1715) 	long src_capacity, dst_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1717) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1718) 	 * The load is corrected for the CPU capacity available on each node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1719) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1720) 	 * src_load        dst_load
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1721) 	 * ------------ vs ---------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1722) 	 * src_capacity    dst_capacity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1723) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1724) 	src_capacity = env->src_stats.compute_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1725) 	dst_capacity = env->dst_stats.compute_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1727) 	imb = abs(dst_load * src_capacity - src_load * dst_capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1729) 	orig_src_load = env->src_stats.load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1730) 	orig_dst_load = env->dst_stats.load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1732) 	old_imb = abs(orig_dst_load * src_capacity - orig_src_load * dst_capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1734) 	/* Would this change make things worse? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1735) 	return (imb > old_imb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1738) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1739)  * Maximum NUMA importance can be 1998 (2*999);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1740)  * SMALLIMP @ 30 would be close to 1998/64.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1741)  * Used to deter task migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1742)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1743) #define SMALLIMP	30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1745) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1746)  * This checks if the overall compute and NUMA accesses of the system would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1747)  * be improved if the source tasks was migrated to the target dst_cpu taking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1748)  * into account that it might be best if task running on the dst_cpu should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1749)  * be exchanged with the source task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1750)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1751) static bool task_numa_compare(struct task_numa_env *env,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1752) 			      long taskimp, long groupimp, bool maymove)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1753) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1754) 	struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1755) 	struct rq *dst_rq = cpu_rq(env->dst_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1756) 	long imp = p_ng ? groupimp : taskimp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1757) 	struct task_struct *cur;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1758) 	long src_load, dst_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1759) 	int dist = env->dist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1760) 	long moveimp = imp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1761) 	long load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1762) 	bool stopsearch = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1764) 	if (READ_ONCE(dst_rq->numa_migrate_on))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1765) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1767) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1768) 	cur = rcu_dereference(dst_rq->curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1769) 	if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1770) 		cur = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1772) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1773) 	 * Because we have preemption enabled we can get migrated around and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1774) 	 * end try selecting ourselves (current == env->p) as a swap candidate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1775) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1776) 	if (cur == env->p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1777) 		stopsearch = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1778) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1779) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1781) 	if (!cur) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1782) 		if (maymove && moveimp >= env->best_imp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1783) 			goto assign;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1784) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1785) 			goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1786) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1788) 	/* Skip this swap candidate if cannot move to the source cpu. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1789) 	if (!cpumask_test_cpu(env->src_cpu, cur->cpus_ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1790) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1792) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1793) 	 * Skip this swap candidate if it is not moving to its preferred
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1794) 	 * node and the best task is.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1795) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1796) 	if (env->best_task &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1797) 	    env->best_task->numa_preferred_nid == env->src_nid &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1798) 	    cur->numa_preferred_nid != env->src_nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1799) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1800) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1802) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1803) 	 * "imp" is the fault differential for the source task between the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1804) 	 * source and destination node. Calculate the total differential for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1805) 	 * the source task and potential destination task. The more negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1806) 	 * the value is, the more remote accesses that would be expected to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1807) 	 * be incurred if the tasks were swapped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1808) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1809) 	 * If dst and source tasks are in the same NUMA group, or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1810) 	 * in any group then look only at task weights.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1811) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1812) 	cur_ng = rcu_dereference(cur->numa_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1813) 	if (cur_ng == p_ng) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1814) 		imp = taskimp + task_weight(cur, env->src_nid, dist) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1815) 		      task_weight(cur, env->dst_nid, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1816) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1817) 		 * Add some hysteresis to prevent swapping the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1818) 		 * tasks within a group over tiny differences.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1819) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1820) 		if (cur_ng)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1821) 			imp -= imp / 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1822) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1823) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1824) 		 * Compare the group weights. If a task is all by itself
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1825) 		 * (not part of a group), use the task weight instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1826) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1827) 		if (cur_ng && p_ng)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1828) 			imp += group_weight(cur, env->src_nid, dist) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1829) 			       group_weight(cur, env->dst_nid, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1830) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1831) 			imp += task_weight(cur, env->src_nid, dist) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1832) 			       task_weight(cur, env->dst_nid, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1833) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1835) 	/* Discourage picking a task already on its preferred node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1836) 	if (cur->numa_preferred_nid == env->dst_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1837) 		imp -= imp / 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1839) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1840) 	 * Encourage picking a task that moves to its preferred node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1841) 	 * This potentially makes imp larger than it's maximum of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1842) 	 * 1998 (see SMALLIMP and task_weight for why) but in this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1843) 	 * case, it does not matter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1844) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1845) 	if (cur->numa_preferred_nid == env->src_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1846) 		imp += imp / 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1848) 	if (maymove && moveimp > imp && moveimp > env->best_imp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1849) 		imp = moveimp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1850) 		cur = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1851) 		goto assign;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1852) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1853) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1854) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1855) 	 * Prefer swapping with a task moving to its preferred node over a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1856) 	 * task that is not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1857) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1858) 	if (env->best_task && cur->numa_preferred_nid == env->src_nid &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1859) 	    env->best_task->numa_preferred_nid != env->src_nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1860) 		goto assign;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1861) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1863) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1864) 	 * If the NUMA importance is less than SMALLIMP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1865) 	 * task migration might only result in ping pong
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1866) 	 * of tasks and also hurt performance due to cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1867) 	 * misses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1868) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1869) 	if (imp < SMALLIMP || imp <= env->best_imp + SMALLIMP / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1870) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1871) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1872) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1873) 	 * In the overloaded case, try and keep the load balanced.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1874) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1875) 	load = task_h_load(env->p) - task_h_load(cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1876) 	if (!load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1877) 		goto assign;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1879) 	dst_load = env->dst_stats.load + load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1880) 	src_load = env->src_stats.load - load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1882) 	if (load_too_imbalanced(src_load, dst_load, env))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1883) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1885) assign:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1886) 	/* Evaluate an idle CPU for a task numa move. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1887) 	if (!cur) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1888) 		int cpu = env->dst_stats.idle_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1890) 		/* Nothing cached so current CPU went idle since the search. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1891) 		if (cpu < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1892) 			cpu = env->dst_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1894) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1895) 		 * If the CPU is no longer truly idle and the previous best CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1896) 		 * is, keep using it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1897) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1898) 		if (!idle_cpu(cpu) && env->best_cpu >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1899) 		    idle_cpu(env->best_cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1900) 			cpu = env->best_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1901) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1903) 		env->dst_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1904) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1906) 	task_numa_assign(env, cur, imp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1908) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1909) 	 * If a move to idle is allowed because there is capacity or load
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1910) 	 * balance improves then stop the search. While a better swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1911) 	 * candidate may exist, a search is not free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1912) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1913) 	if (maymove && !cur && env->best_cpu >= 0 && idle_cpu(env->best_cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1914) 		stopsearch = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1915) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1916) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1917) 	 * If a swap candidate must be identified and the current best task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1918) 	 * moves its preferred node then stop the search.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1919) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1920) 	if (!maymove && env->best_task &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1921) 	    env->best_task->numa_preferred_nid == env->src_nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1922) 		stopsearch = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1923) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1924) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1925) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1927) 	return stopsearch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1930) static void task_numa_find_cpu(struct task_numa_env *env,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1931) 				long taskimp, long groupimp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1932) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1933) 	bool maymove = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1934) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1936) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1937) 	 * If dst node has spare capacity, then check if there is an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1938) 	 * imbalance that would be overruled by the load balancer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1939) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1940) 	if (env->dst_stats.node_type == node_has_spare) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1941) 		unsigned int imbalance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1942) 		int src_running, dst_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1943) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1944) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1945) 		 * Would movement cause an imbalance? Note that if src has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1946) 		 * more running tasks that the imbalance is ignored as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1947) 		 * move improves the imbalance from the perspective of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1948) 		 * CPU load balancer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1949) 		 * */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1950) 		src_running = env->src_stats.nr_running - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1951) 		dst_running = env->dst_stats.nr_running + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1952) 		imbalance = max(0, dst_running - src_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1953) 		imbalance = adjust_numa_imbalance(imbalance, dst_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1955) 		/* Use idle CPU if there is no imbalance */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1956) 		if (!imbalance) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1957) 			maymove = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1958) 			if (env->dst_stats.idle_cpu >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1959) 				env->dst_cpu = env->dst_stats.idle_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1960) 				task_numa_assign(env, NULL, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1961) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1962) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1963) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1964) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1965) 		long src_load, dst_load, load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1966) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1967) 		 * If the improvement from just moving env->p direction is better
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1968) 		 * than swapping tasks around, check if a move is possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1969) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1970) 		load = task_h_load(env->p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1971) 		dst_load = env->dst_stats.load + load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1972) 		src_load = env->src_stats.load - load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1973) 		maymove = !load_too_imbalanced(src_load, dst_load, env);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1974) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1976) 	for_each_cpu(cpu, cpumask_of_node(env->dst_nid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1977) 		/* Skip this CPU if the source task cannot migrate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1978) 		if (!cpumask_test_cpu(cpu, env->p->cpus_ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1979) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1980) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1981) 		env->dst_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1982) 		if (task_numa_compare(env, taskimp, groupimp, maymove))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1983) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1984) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1986) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1987) static int task_numa_migrate(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1988) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1989) 	struct task_numa_env env = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1990) 		.p = p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1992) 		.src_cpu = task_cpu(p),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1993) 		.src_nid = task_node(p),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1995) 		.imbalance_pct = 112,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1997) 		.best_task = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1998) 		.best_imp = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  1999) 		.best_cpu = -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2000) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2001) 	unsigned long taskweight, groupweight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2002) 	struct sched_domain *sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2003) 	long taskimp, groupimp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2004) 	struct numa_group *ng;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2005) 	struct rq *best_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2006) 	int nid, ret, dist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2007) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2008) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2009) 	 * Pick the lowest SD_NUMA domain, as that would have the smallest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2010) 	 * imbalance and would be the first to start moving tasks about.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2011) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2012) 	 * And we want to avoid any moving of tasks about, as that would create
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2013) 	 * random movement of tasks -- counter the numa conditions we're trying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2014) 	 * to satisfy here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2015) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2016) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2017) 	sd = rcu_dereference(per_cpu(sd_numa, env.src_cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2018) 	if (sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2019) 		env.imbalance_pct = 100 + (sd->imbalance_pct - 100) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2020) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2021) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2022) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2023) 	 * Cpusets can break the scheduler domain tree into smaller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2024) 	 * balance domains, some of which do not cross NUMA boundaries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2025) 	 * Tasks that are "trapped" in such domains cannot be migrated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2026) 	 * elsewhere, so there is no point in (re)trying.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2027) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2028) 	if (unlikely(!sd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2029) 		sched_setnuma(p, task_node(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2030) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2031) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2033) 	env.dst_nid = p->numa_preferred_nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2034) 	dist = env.dist = node_distance(env.src_nid, env.dst_nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2035) 	taskweight = task_weight(p, env.src_nid, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2036) 	groupweight = group_weight(p, env.src_nid, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2037) 	update_numa_stats(&env, &env.src_stats, env.src_nid, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2038) 	taskimp = task_weight(p, env.dst_nid, dist) - taskweight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2039) 	groupimp = group_weight(p, env.dst_nid, dist) - groupweight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2040) 	update_numa_stats(&env, &env.dst_stats, env.dst_nid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2042) 	/* Try to find a spot on the preferred nid. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2043) 	task_numa_find_cpu(&env, taskimp, groupimp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2044) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2045) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2046) 	 * Look at other nodes in these cases:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2047) 	 * - there is no space available on the preferred_nid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2048) 	 * - the task is part of a numa_group that is interleaved across
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2049) 	 *   multiple NUMA nodes; in order to better consolidate the group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2050) 	 *   we need to check other locations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2051) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2052) 	ng = deref_curr_numa_group(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2053) 	if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2054) 		for_each_online_node(nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2055) 			if (nid == env.src_nid || nid == p->numa_preferred_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2056) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2058) 			dist = node_distance(env.src_nid, env.dst_nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2059) 			if (sched_numa_topology_type == NUMA_BACKPLANE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2060) 						dist != env.dist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2061) 				taskweight = task_weight(p, env.src_nid, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2062) 				groupweight = group_weight(p, env.src_nid, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2063) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2065) 			/* Only consider nodes where both task and groups benefit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2066) 			taskimp = task_weight(p, nid, dist) - taskweight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2067) 			groupimp = group_weight(p, nid, dist) - groupweight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2068) 			if (taskimp < 0 && groupimp < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2069) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2071) 			env.dist = dist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2072) 			env.dst_nid = nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2073) 			update_numa_stats(&env, &env.dst_stats, env.dst_nid, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2074) 			task_numa_find_cpu(&env, taskimp, groupimp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2075) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2076) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2078) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2079) 	 * If the task is part of a workload that spans multiple NUMA nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2080) 	 * and is migrating into one of the workload's active nodes, remember
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2081) 	 * this node as the task's preferred numa node, so the workload can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2082) 	 * settle down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2083) 	 * A task that migrated to a second choice node will be better off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2084) 	 * trying for a better one later. Do not set the preferred node here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2085) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2086) 	if (ng) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2087) 		if (env.best_cpu == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2088) 			nid = env.src_nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2089) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2090) 			nid = cpu_to_node(env.best_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2092) 		if (nid != p->numa_preferred_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2093) 			sched_setnuma(p, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2094) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2096) 	/* No better CPU than the current one was found. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2097) 	if (env.best_cpu == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2098) 		trace_sched_stick_numa(p, env.src_cpu, NULL, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2099) 		return -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2100) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2102) 	best_rq = cpu_rq(env.best_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2103) 	if (env.best_task == NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2104) 		ret = migrate_task_to(p, env.best_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2105) 		WRITE_ONCE(best_rq->numa_migrate_on, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2106) 		if (ret != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2107) 			trace_sched_stick_numa(p, env.src_cpu, NULL, env.best_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2108) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2109) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2111) 	ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2112) 	WRITE_ONCE(best_rq->numa_migrate_on, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2114) 	if (ret != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2115) 		trace_sched_stick_numa(p, env.src_cpu, env.best_task, env.best_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2116) 	put_task_struct(env.best_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2117) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2118) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2120) /* Attempt to migrate a task to a CPU on the preferred node. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2121) static void numa_migrate_preferred(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2123) 	unsigned long interval = HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2125) 	/* This task has no NUMA fault statistics yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2126) 	if (unlikely(p->numa_preferred_nid == NUMA_NO_NODE || !p->numa_faults))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2127) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2129) 	/* Periodically retry migrating the task to the preferred node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2130) 	interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2131) 	p->numa_migrate_retry = jiffies + interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2133) 	/* Success if task is already running on preferred CPU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2134) 	if (task_node(p) == p->numa_preferred_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2135) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2137) 	/* Otherwise, try migrate to a CPU on the preferred node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2138) 	task_numa_migrate(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2141) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2142)  * Find out how many nodes on the workload is actively running on. Do this by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2143)  * tracking the nodes from which NUMA hinting faults are triggered. This can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2144)  * be different from the set of nodes where the workload's memory is currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2145)  * located.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2146)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2147) static void numa_group_count_active_nodes(struct numa_group *numa_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2149) 	unsigned long faults, max_faults = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2150) 	int nid, active_nodes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2152) 	for_each_online_node(nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2153) 		faults = group_faults_cpu(numa_group, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2154) 		if (faults > max_faults)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2155) 			max_faults = faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2156) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2158) 	for_each_online_node(nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2159) 		faults = group_faults_cpu(numa_group, nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2160) 		if (faults * ACTIVE_NODE_FRACTION > max_faults)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2161) 			active_nodes++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2162) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2164) 	numa_group->max_faults_cpu = max_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2165) 	numa_group->active_nodes = active_nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2168) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2169)  * When adapting the scan rate, the period is divided into NUMA_PERIOD_SLOTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2170)  * increments. The more local the fault statistics are, the higher the scan
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2171)  * period will be for the next scan window. If local/(local+remote) ratio is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2172)  * below NUMA_PERIOD_THRESHOLD (where range of ratio is 1..NUMA_PERIOD_SLOTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2173)  * the scan period will decrease. Aim for 70% local accesses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2174)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2175) #define NUMA_PERIOD_SLOTS 10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2176) #define NUMA_PERIOD_THRESHOLD 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2178) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2179)  * Increase the scan period (slow down scanning) if the majority of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2180)  * our memory is already on our local node, or if the majority of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2181)  * the page accesses are shared with other processes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2182)  * Otherwise, decrease the scan period.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2183)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2184) static void update_task_scan_period(struct task_struct *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2185) 			unsigned long shared, unsigned long private)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2187) 	unsigned int period_slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2188) 	int lr_ratio, ps_ratio;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2189) 	int diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2191) 	unsigned long remote = p->numa_faults_locality[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2192) 	unsigned long local = p->numa_faults_locality[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2194) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2195) 	 * If there were no record hinting faults then either the task is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2196) 	 * completely idle or all activity is areas that are not of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2197) 	 * to automatic numa balancing. Related to that, if there were failed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2198) 	 * migration then it implies we are migrating too quickly or the local
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2199) 	 * node is overloaded. In either case, scan slower
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2200) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2201) 	if (local + shared == 0 || p->numa_faults_locality[2]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2202) 		p->numa_scan_period = min(p->numa_scan_period_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2203) 			p->numa_scan_period << 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2205) 		p->mm->numa_next_scan = jiffies +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2206) 			msecs_to_jiffies(p->numa_scan_period);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2208) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2209) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2211) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2212) 	 * Prepare to scale scan period relative to the current period.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2213) 	 *	 == NUMA_PERIOD_THRESHOLD scan period stays the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2214) 	 *       <  NUMA_PERIOD_THRESHOLD scan period decreases (scan faster)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2215) 	 *	 >= NUMA_PERIOD_THRESHOLD scan period increases (scan slower)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2216) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2217) 	period_slot = DIV_ROUND_UP(p->numa_scan_period, NUMA_PERIOD_SLOTS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2218) 	lr_ratio = (local * NUMA_PERIOD_SLOTS) / (local + remote);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2219) 	ps_ratio = (private * NUMA_PERIOD_SLOTS) / (private + shared);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2221) 	if (ps_ratio >= NUMA_PERIOD_THRESHOLD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2222) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2223) 		 * Most memory accesses are local. There is no need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2224) 		 * do fast NUMA scanning, since memory is already local.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2225) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2226) 		int slot = ps_ratio - NUMA_PERIOD_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2227) 		if (!slot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2228) 			slot = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2229) 		diff = slot * period_slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2230) 	} else if (lr_ratio >= NUMA_PERIOD_THRESHOLD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2231) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2232) 		 * Most memory accesses are shared with other tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2233) 		 * There is no point in continuing fast NUMA scanning,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2234) 		 * since other tasks may just move the memory elsewhere.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2235) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2236) 		int slot = lr_ratio - NUMA_PERIOD_THRESHOLD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2237) 		if (!slot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2238) 			slot = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2239) 		diff = slot * period_slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2240) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2241) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2242) 		 * Private memory faults exceed (SLOTS-THRESHOLD)/SLOTS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2243) 		 * yet they are not on the local NUMA node. Speed up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2244) 		 * NUMA scanning to get the memory moved over.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2245) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2246) 		int ratio = max(lr_ratio, ps_ratio);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2247) 		diff = -(NUMA_PERIOD_THRESHOLD - ratio) * period_slot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2248) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2250) 	p->numa_scan_period = clamp(p->numa_scan_period + diff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2251) 			task_scan_min(p), task_scan_max(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2252) 	memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2255) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2256)  * Get the fraction of time the task has been running since the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2257)  * NUMA placement cycle. The scheduler keeps similar statistics, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2258)  * decays those on a 32ms period, which is orders of magnitude off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2259)  * from the dozens-of-seconds NUMA balancing period. Use the scheduler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2260)  * stats only if the task is so new there are no NUMA statistics yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2261)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2262) static u64 numa_get_avg_runtime(struct task_struct *p, u64 *period)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2263) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2264) 	u64 runtime, delta, now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2265) 	/* Use the start of this time slice to avoid calculations. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2266) 	now = p->se.exec_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2267) 	runtime = p->se.sum_exec_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2269) 	if (p->last_task_numa_placement) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2270) 		delta = runtime - p->last_sum_exec_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2271) 		*period = now - p->last_task_numa_placement;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2273) 		/* Avoid time going backwards, prevent potential divide error: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2274) 		if (unlikely((s64)*period < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2275) 			*period = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2276) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2277) 		delta = p->se.avg.load_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2278) 		*period = LOAD_AVG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2279) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2281) 	p->last_sum_exec_runtime = runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2282) 	p->last_task_numa_placement = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2284) 	return delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2287) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2288)  * Determine the preferred nid for a task in a numa_group. This needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2289)  * be done in a way that produces consistent results with group_weight,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2290)  * otherwise workloads might not converge.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2291)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2292) static int preferred_group_nid(struct task_struct *p, int nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2293) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2294) 	nodemask_t nodes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2295) 	int dist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2297) 	/* Direct connections between all NUMA nodes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2298) 	if (sched_numa_topology_type == NUMA_DIRECT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2299) 		return nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2301) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2302) 	 * On a system with glueless mesh NUMA topology, group_weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2303) 	 * scores nodes according to the number of NUMA hinting faults on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2304) 	 * both the node itself, and on nearby nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2305) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2306) 	if (sched_numa_topology_type == NUMA_GLUELESS_MESH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2307) 		unsigned long score, max_score = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2308) 		int node, max_node = nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2310) 		dist = sched_max_numa_distance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2312) 		for_each_online_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2313) 			score = group_weight(p, node, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2314) 			if (score > max_score) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2315) 				max_score = score;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2316) 				max_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2317) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2318) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2319) 		return max_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2320) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2322) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2323) 	 * Finding the preferred nid in a system with NUMA backplane
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2324) 	 * interconnect topology is more involved. The goal is to locate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2325) 	 * tasks from numa_groups near each other in the system, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2326) 	 * untangle workloads from different sides of the system. This requires
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2327) 	 * searching down the hierarchy of node groups, recursively searching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2328) 	 * inside the highest scoring group of nodes. The nodemask tricks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2329) 	 * keep the complexity of the search down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2330) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2331) 	nodes = node_online_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2332) 	for (dist = sched_max_numa_distance; dist > LOCAL_DISTANCE; dist--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2333) 		unsigned long max_faults = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2334) 		nodemask_t max_group = NODE_MASK_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2335) 		int a, b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2337) 		/* Are there nodes at this distance from each other? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2338) 		if (!find_numa_distance(dist))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2339) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2341) 		for_each_node_mask(a, nodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2342) 			unsigned long faults = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2343) 			nodemask_t this_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2344) 			nodes_clear(this_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2346) 			/* Sum group's NUMA faults; includes a==b case. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2347) 			for_each_node_mask(b, nodes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2348) 				if (node_distance(a, b) < dist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2349) 					faults += group_faults(p, b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2350) 					node_set(b, this_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2351) 					node_clear(b, nodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2352) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2353) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2355) 			/* Remember the top group. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2356) 			if (faults > max_faults) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2357) 				max_faults = faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2358) 				max_group = this_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2359) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2360) 				 * subtle: at the smallest distance there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2361) 				 * just one node left in each "group", the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2362) 				 * winner is the preferred nid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2363) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2364) 				nid = a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2365) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2366) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2367) 		/* Next round, evaluate the nodes within max_group. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2368) 		if (!max_faults)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2369) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2370) 		nodes = max_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2371) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2372) 	return nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2375) static void task_numa_placement(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2377) 	int seq, nid, max_nid = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2378) 	unsigned long max_faults = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2379) 	unsigned long fault_types[2] = { 0, 0 };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2380) 	unsigned long total_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2381) 	u64 runtime, period;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2382) 	spinlock_t *group_lock = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2383) 	struct numa_group *ng;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2385) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2386) 	 * The p->mm->numa_scan_seq field gets updated without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2387) 	 * exclusive access. Use READ_ONCE() here to ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2388) 	 * that the field is read in a single access:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2389) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2390) 	seq = READ_ONCE(p->mm->numa_scan_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2391) 	if (p->numa_scan_seq == seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2392) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2393) 	p->numa_scan_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2394) 	p->numa_scan_period_max = task_scan_max(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2395) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2396) 	total_faults = p->numa_faults_locality[0] +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2397) 		       p->numa_faults_locality[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2398) 	runtime = numa_get_avg_runtime(p, &period);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2400) 	/* If the task is part of a group prevent parallel updates to group stats */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2401) 	ng = deref_curr_numa_group(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2402) 	if (ng) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2403) 		group_lock = &ng->lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2404) 		spin_lock_irq(group_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2405) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2407) 	/* Find the node with the highest number of faults */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2408) 	for_each_online_node(nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2409) 		/* Keep track of the offsets in numa_faults array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2410) 		int mem_idx, membuf_idx, cpu_idx, cpubuf_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2411) 		unsigned long faults = 0, group_faults = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2412) 		int priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2414) 		for (priv = 0; priv < NR_NUMA_HINT_FAULT_TYPES; priv++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2415) 			long diff, f_diff, f_weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2417) 			mem_idx = task_faults_idx(NUMA_MEM, nid, priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2418) 			membuf_idx = task_faults_idx(NUMA_MEMBUF, nid, priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2419) 			cpu_idx = task_faults_idx(NUMA_CPU, nid, priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2420) 			cpubuf_idx = task_faults_idx(NUMA_CPUBUF, nid, priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2422) 			/* Decay existing window, copy faults since last scan */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2423) 			diff = p->numa_faults[membuf_idx] - p->numa_faults[mem_idx] / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2424) 			fault_types[priv] += p->numa_faults[membuf_idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2425) 			p->numa_faults[membuf_idx] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2427) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2428) 			 * Normalize the faults_from, so all tasks in a group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2429) 			 * count according to CPU use, instead of by the raw
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2430) 			 * number of faults. Tasks with little runtime have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2431) 			 * little over-all impact on throughput, and thus their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2432) 			 * faults are less important.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2433) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2434) 			f_weight = div64_u64(runtime << 16, period + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2435) 			f_weight = (f_weight * p->numa_faults[cpubuf_idx]) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2436) 				   (total_faults + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2437) 			f_diff = f_weight - p->numa_faults[cpu_idx] / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2438) 			p->numa_faults[cpubuf_idx] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2440) 			p->numa_faults[mem_idx] += diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2441) 			p->numa_faults[cpu_idx] += f_diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2442) 			faults += p->numa_faults[mem_idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2443) 			p->total_numa_faults += diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2444) 			if (ng) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2445) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2446) 				 * safe because we can only change our own group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2447) 				 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2448) 				 * mem_idx represents the offset for a given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2449) 				 * nid and priv in a specific region because it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2450) 				 * is at the beginning of the numa_faults array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2451) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2452) 				ng->faults[mem_idx] += diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2453) 				ng->faults_cpu[mem_idx] += f_diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2454) 				ng->total_faults += diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2455) 				group_faults += ng->faults[mem_idx];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2456) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2457) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2459) 		if (!ng) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2460) 			if (faults > max_faults) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2461) 				max_faults = faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2462) 				max_nid = nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2463) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2464) 		} else if (group_faults > max_faults) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2465) 			max_faults = group_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2466) 			max_nid = nid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2467) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2468) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2470) 	if (ng) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2471) 		numa_group_count_active_nodes(ng);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2472) 		spin_unlock_irq(group_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2473) 		max_nid = preferred_group_nid(p, max_nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2474) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2476) 	if (max_faults) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2477) 		/* Set the new preferred node */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2478) 		if (max_nid != p->numa_preferred_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2479) 			sched_setnuma(p, max_nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2480) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2482) 	update_task_scan_period(p, fault_types[0], fault_types[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2485) static inline int get_numa_group(struct numa_group *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2486) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2487) 	return refcount_inc_not_zero(&grp->refcount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2489) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2490) static inline void put_numa_group(struct numa_group *grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2492) 	if (refcount_dec_and_test(&grp->refcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2493) 		kfree_rcu(grp, rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2495) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2496) static void task_numa_group(struct task_struct *p, int cpupid, int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2497) 			int *priv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2498) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2499) 	struct numa_group *grp, *my_grp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2500) 	struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2501) 	bool join = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2502) 	int cpu = cpupid_to_cpu(cpupid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2503) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2505) 	if (unlikely(!deref_curr_numa_group(p))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2506) 		unsigned int size = sizeof(struct numa_group) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2507) 				    4*nr_node_ids*sizeof(unsigned long);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2509) 		grp = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2510) 		if (!grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2511) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2513) 		refcount_set(&grp->refcount, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2514) 		grp->active_nodes = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2515) 		grp->max_faults_cpu = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2516) 		spin_lock_init(&grp->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2517) 		grp->gid = p->pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2518) 		/* Second half of the array tracks nids where faults happen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2519) 		grp->faults_cpu = grp->faults + NR_NUMA_HINT_FAULT_TYPES *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2520) 						nr_node_ids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2522) 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2523) 			grp->faults[i] = p->numa_faults[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2525) 		grp->total_faults = p->total_numa_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2527) 		grp->nr_tasks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2528) 		rcu_assign_pointer(p->numa_group, grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2529) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2531) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2532) 	tsk = READ_ONCE(cpu_rq(cpu)->curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2533) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2534) 	if (!cpupid_match_pid(tsk, cpupid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2535) 		goto no_join;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2537) 	grp = rcu_dereference(tsk->numa_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2538) 	if (!grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2539) 		goto no_join;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2541) 	my_grp = deref_curr_numa_group(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2542) 	if (grp == my_grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2543) 		goto no_join;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2545) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2546) 	 * Only join the other group if its bigger; if we're the bigger group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2547) 	 * the other task will join us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2548) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2549) 	if (my_grp->nr_tasks > grp->nr_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2550) 		goto no_join;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2552) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2553) 	 * Tie-break on the grp address.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2554) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2555) 	if (my_grp->nr_tasks == grp->nr_tasks && my_grp > grp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2556) 		goto no_join;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2558) 	/* Always join threads in the same process. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2559) 	if (tsk->mm == current->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2560) 		join = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2562) 	/* Simple filter to avoid false positives due to PID collisions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2563) 	if (flags & TNF_SHARED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2564) 		join = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2566) 	/* Update priv based on whether false sharing was detected */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2567) 	*priv = !join;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2569) 	if (join && !get_numa_group(grp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2570) 		goto no_join;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2572) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2574) 	if (!join)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2575) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2577) 	BUG_ON(irqs_disabled());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2578) 	double_lock_irq(&my_grp->lock, &grp->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2580) 	for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2581) 		my_grp->faults[i] -= p->numa_faults[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2582) 		grp->faults[i] += p->numa_faults[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2583) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2584) 	my_grp->total_faults -= p->total_numa_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2585) 	grp->total_faults += p->total_numa_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2587) 	my_grp->nr_tasks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2588) 	grp->nr_tasks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2590) 	spin_unlock(&my_grp->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2591) 	spin_unlock_irq(&grp->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2593) 	rcu_assign_pointer(p->numa_group, grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2595) 	put_numa_group(my_grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2596) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2598) no_join:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2599) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2600) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2603) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2604)  * Get rid of NUMA staticstics associated with a task (either current or dead).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2605)  * If @final is set, the task is dead and has reached refcount zero, so we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2606)  * safely free all relevant data structures. Otherwise, there might be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2607)  * concurrent reads from places like load balancing and procfs, and we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2608)  * reset the data back to default state without freeing ->numa_faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2609)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2610) void task_numa_free(struct task_struct *p, bool final)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2611) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2612) 	/* safe: p either is current or is being freed by current */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2613) 	struct numa_group *grp = rcu_dereference_raw(p->numa_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2614) 	unsigned long *numa_faults = p->numa_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2615) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2616) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2618) 	if (!numa_faults)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2619) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2620) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2621) 	if (grp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2622) 		spin_lock_irqsave(&grp->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2623) 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2624) 			grp->faults[i] -= p->numa_faults[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2625) 		grp->total_faults -= p->total_numa_faults;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2627) 		grp->nr_tasks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2628) 		spin_unlock_irqrestore(&grp->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2629) 		RCU_INIT_POINTER(p->numa_group, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2630) 		put_numa_group(grp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2631) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2633) 	if (final) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2634) 		p->numa_faults = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2635) 		kfree(numa_faults);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2636) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2637) 		p->total_numa_faults = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2638) 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2639) 			numa_faults[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2640) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2643) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2644)  * Got a PROT_NONE fault for a page on @node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2645)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2646) void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2647) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2648) 	struct task_struct *p = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2649) 	bool migrated = flags & TNF_MIGRATED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2650) 	int cpu_node = task_node(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2651) 	int local = !!(flags & TNF_FAULT_LOCAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2652) 	struct numa_group *ng;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2653) 	int priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2655) 	if (!static_branch_likely(&sched_numa_balancing))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2656) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2658) 	/* for example, ksmd faulting in a user's mm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2659) 	if (!p->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2660) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2662) 	/* Allocate buffer to track faults on a per-node basis */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2663) 	if (unlikely(!p->numa_faults)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2664) 		int size = sizeof(*p->numa_faults) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2665) 			   NR_NUMA_HINT_FAULT_BUCKETS * nr_node_ids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2667) 		p->numa_faults = kzalloc(size, GFP_KERNEL|__GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2668) 		if (!p->numa_faults)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2669) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2671) 		p->total_numa_faults = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2672) 		memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2673) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2675) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2676) 	 * First accesses are treated as private, otherwise consider accesses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2677) 	 * to be private if the accessing pid has not changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2678) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2679) 	if (unlikely(last_cpupid == (-1 & LAST_CPUPID_MASK))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2680) 		priv = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2681) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2682) 		priv = cpupid_match_pid(p, last_cpupid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2683) 		if (!priv && !(flags & TNF_NO_GROUP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2684) 			task_numa_group(p, last_cpupid, flags, &priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2685) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2687) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2688) 	 * If a workload spans multiple NUMA nodes, a shared fault that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2689) 	 * occurs wholly within the set of nodes that the workload is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2690) 	 * actively using should be counted as local. This allows the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2691) 	 * scan rate to slow down when a workload has settled down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2692) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2693) 	ng = deref_curr_numa_group(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2694) 	if (!priv && !local && ng && ng->active_nodes > 1 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2695) 				numa_is_active_node(cpu_node, ng) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2696) 				numa_is_active_node(mem_node, ng))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2697) 		local = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2699) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2700) 	 * Retry to migrate task to preferred node periodically, in case it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2701) 	 * previously failed, or the scheduler moved us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2702) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2703) 	if (time_after(jiffies, p->numa_migrate_retry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2704) 		task_numa_placement(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2705) 		numa_migrate_preferred(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2706) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2708) 	if (migrated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2709) 		p->numa_pages_migrated += pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2710) 	if (flags & TNF_MIGRATE_FAIL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2711) 		p->numa_faults_locality[2] += pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2713) 	p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2714) 	p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2715) 	p->numa_faults_locality[local] += pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2718) static void reset_ptenuma_scan(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2719) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2720) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2721) 	 * We only did a read acquisition of the mmap sem, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2722) 	 * p->mm->numa_scan_seq is written to without exclusive access
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2723) 	 * and the update is not guaranteed to be atomic. That's not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2724) 	 * much of an issue though, since this is just used for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2725) 	 * statistical sampling. Use READ_ONCE/WRITE_ONCE, which are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2726) 	 * expensive, to avoid any form of compiler optimizations:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2727) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2728) 	WRITE_ONCE(p->mm->numa_scan_seq, READ_ONCE(p->mm->numa_scan_seq) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2729) 	p->mm->numa_scan_offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2732) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2733)  * The expensive part of numa migration is done from task_work context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2734)  * Triggered from task_tick_numa().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2735)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2736) static void task_numa_work(struct callback_head *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2737) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2738) 	unsigned long migrate, next_scan, now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2739) 	struct task_struct *p = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2740) 	struct mm_struct *mm = p->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2741) 	u64 runtime = p->se.sum_exec_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2742) 	struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2743) 	unsigned long start, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2744) 	unsigned long nr_pte_updates = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2745) 	long pages, virtpages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2747) 	SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2749) 	work->next = work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2750) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2751) 	 * Who cares about NUMA placement when they're dying.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2752) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2753) 	 * NOTE: make sure not to dereference p->mm before this check,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2754) 	 * exit_task_work() happens _after_ exit_mm() so we could be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2755) 	 * without p->mm even though we still had it when we enqueued this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2756) 	 * work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2757) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2758) 	if (p->flags & PF_EXITING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2759) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2761) 	if (!mm->numa_next_scan) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2762) 		mm->numa_next_scan = now +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2763) 			msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2764) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2765) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2766) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2767) 	 * Enforce maximal scan/migration frequency..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2768) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2769) 	migrate = mm->numa_next_scan;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2770) 	if (time_before(now, migrate))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2771) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2773) 	if (p->numa_scan_period == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2774) 		p->numa_scan_period_max = task_scan_max(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2775) 		p->numa_scan_period = task_scan_start(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2776) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2778) 	next_scan = now + msecs_to_jiffies(p->numa_scan_period);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2779) 	if (cmpxchg(&mm->numa_next_scan, migrate, next_scan) != migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2780) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2781) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2782) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2783) 	 * Delay this task enough that another task of this mm will likely win
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2784) 	 * the next time around.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2785) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2786) 	p->node_stamp += 2 * TICK_NSEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2788) 	start = mm->numa_scan_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2789) 	pages = sysctl_numa_balancing_scan_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2790) 	pages <<= 20 - PAGE_SHIFT; /* MB in pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2791) 	virtpages = pages * 8;	   /* Scan up to this much virtual space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2792) 	if (!pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2793) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2796) 	if (!mmap_read_trylock(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2797) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2798) 	vma = find_vma(mm, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2799) 	if (!vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2800) 		reset_ptenuma_scan(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2801) 		start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2802) 		vma = mm->mmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2803) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2804) 	for (; vma; vma = vma->vm_next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2805) 		if (!vma_migratable(vma) || !vma_policy_mof(vma) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2806) 			is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2807) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2808) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2809) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2810) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2811) 		 * Shared library pages mapped by multiple processes are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2812) 		 * migrated as it is expected they are cache replicated. Avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2813) 		 * hinting faults in read-only file-backed mappings or the vdso
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2814) 		 * as migrating the pages will be of marginal benefit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2815) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2816) 		if (!vma->vm_mm ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2817) 		    (vma->vm_file && (vma->vm_flags & (VM_READ|VM_WRITE)) == (VM_READ)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2818) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2820) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2821) 		 * Skip inaccessible VMAs to avoid any confusion between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2822) 		 * PROT_NONE and NUMA hinting ptes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2823) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2824) 		if (!vma_is_accessible(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2825) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2826) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2827) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2828) 			start = max(start, vma->vm_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2829) 			end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2830) 			end = min(end, vma->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2831) 			nr_pte_updates = change_prot_numa(vma, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2833) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2834) 			 * Try to scan sysctl_numa_balancing_size worth of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2835) 			 * hpages that have at least one present PTE that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2836) 			 * is not already pte-numa. If the VMA contains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2837) 			 * areas that are unused or already full of prot_numa
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2838) 			 * PTEs, scan up to virtpages, to skip through those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2839) 			 * areas faster.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2840) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2841) 			if (nr_pte_updates)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2842) 				pages -= (end - start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2843) 			virtpages -= (end - start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2845) 			start = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2846) 			if (pages <= 0 || virtpages <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2847) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2849) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2850) 		} while (end != vma->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2851) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2853) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2854) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2855) 	 * It is possible to reach the end of the VMA list but the last few
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2856) 	 * VMAs are not guaranteed to the vma_migratable. If they are not, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2857) 	 * would find the !migratable VMA on the next scan but not reset the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2858) 	 * scanner to the start so check it now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2859) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2860) 	if (vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2861) 		mm->numa_scan_offset = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2862) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2863) 		reset_ptenuma_scan(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2864) 	mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2866) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2867) 	 * Make sure tasks use at least 32x as much time to run other code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2868) 	 * than they used here, to limit NUMA PTE scanning overhead to 3% max.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2869) 	 * Usually update_task_scan_period slows down scanning enough; on an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2870) 	 * overloaded system we need to limit overhead on a per task basis.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2871) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2872) 	if (unlikely(p->se.sum_exec_runtime != runtime)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2873) 		u64 diff = p->se.sum_exec_runtime - runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2874) 		p->node_stamp += 32 * diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2875) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2878) void init_numa_balancing(unsigned long clone_flags, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2879) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2880) 	int mm_users = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2881) 	struct mm_struct *mm = p->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2883) 	if (mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2884) 		mm_users = atomic_read(&mm->mm_users);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2885) 		if (mm_users == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2886) 			mm->numa_next_scan = jiffies + msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2887) 			mm->numa_scan_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2888) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2889) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2890) 	p->node_stamp			= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2891) 	p->numa_scan_seq		= mm ? mm->numa_scan_seq : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2892) 	p->numa_scan_period		= sysctl_numa_balancing_scan_delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2893) 	/* Protect against double add, see task_tick_numa and task_numa_work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2894) 	p->numa_work.next		= &p->numa_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2895) 	p->numa_faults			= NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2896) 	RCU_INIT_POINTER(p->numa_group, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2897) 	p->last_task_numa_placement	= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2898) 	p->last_sum_exec_runtime	= 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2900) 	init_task_work(&p->numa_work, task_numa_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2902) 	/* New address space, reset the preferred nid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2903) 	if (!(clone_flags & CLONE_VM)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2904) 		p->numa_preferred_nid = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2905) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2906) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2908) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2909) 	 * New thread, keep existing numa_preferred_nid which should be copied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2910) 	 * already by arch_dup_task_struct but stagger when scans start.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2911) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2912) 	if (mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2913) 		unsigned int delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2915) 		delay = min_t(unsigned int, task_scan_max(current),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2916) 			current->numa_scan_period * mm_users * NSEC_PER_MSEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2917) 		delay += 2 * TICK_NSEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2918) 		p->node_stamp = delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2919) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2922) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2923)  * Drive the periodic memory faults..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2924)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2925) static void task_tick_numa(struct rq *rq, struct task_struct *curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2927) 	struct callback_head *work = &curr->numa_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2928) 	u64 period, now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2930) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2931) 	 * We don't care about NUMA placement if we don't have memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2932) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2933) 	if ((curr->flags & (PF_EXITING | PF_KTHREAD)) || work->next != work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2934) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2936) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2937) 	 * Using runtime rather than walltime has the dual advantage that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2938) 	 * we (mostly) drive the selection from busy threads and that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2939) 	 * task needs to have done some actual work before we bother with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2940) 	 * NUMA placement.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2941) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2942) 	now = curr->se.sum_exec_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2943) 	period = (u64)curr->numa_scan_period * NSEC_PER_MSEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2945) 	if (now > curr->node_stamp + period) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2946) 		if (!curr->node_stamp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2947) 			curr->numa_scan_period = task_scan_start(curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2948) 		curr->node_stamp += period;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2950) 		if (!time_before(jiffies, curr->mm->numa_next_scan))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2951) 			task_work_add(curr, work, TWA_RESUME);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2952) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2955) static void update_scan_period(struct task_struct *p, int new_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2956) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2957) 	int src_nid = cpu_to_node(task_cpu(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2958) 	int dst_nid = cpu_to_node(new_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2960) 	if (!static_branch_likely(&sched_numa_balancing))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2961) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2963) 	if (!p->mm || !p->numa_faults || (p->flags & PF_EXITING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2964) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2966) 	if (src_nid == dst_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2967) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2969) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2970) 	 * Allow resets if faults have been trapped before one scan
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2971) 	 * has completed. This is most likely due to a new task that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2972) 	 * is pulled cross-node due to wakeups or load balancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2973) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2974) 	if (p->numa_scan_seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2975) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2976) 		 * Avoid scan adjustments if moving to the preferred
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2977) 		 * node or if the task was not previously running on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2978) 		 * the preferred node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2979) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2980) 		if (dst_nid == p->numa_preferred_nid ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2981) 		    (p->numa_preferred_nid != NUMA_NO_NODE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2982) 			src_nid != p->numa_preferred_nid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2983) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2984) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2986) 	p->numa_scan_period = task_scan_start(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2989) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2990) static void task_tick_numa(struct rq *rq, struct task_struct *curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2991) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2994) static inline void account_numa_enqueue(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2995) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2998) static inline void account_numa_dequeue(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  2999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3001) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3002) static inline void update_scan_period(struct task_struct *p, int new_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3004) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3005) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3006) #endif /* CONFIG_NUMA_BALANCING */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3007) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3008) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3009) account_entity_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3010) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3011) 	update_load_add(&cfs_rq->load, se->load.weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3012) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3013) 	if (entity_is_task(se)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3014) 		struct rq *rq = rq_of(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3016) 		account_numa_enqueue(rq, task_of(se));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3017) 		list_add(&se->group_node, &rq->cfs_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3018) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3019) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3020) 	cfs_rq->nr_running++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3021) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3023) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3024) account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3025) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3026) 	update_load_sub(&cfs_rq->load, se->load.weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3027) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3028) 	if (entity_is_task(se)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3029) 		account_numa_dequeue(rq_of(cfs_rq), task_of(se));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3030) 		list_del_init(&se->group_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3031) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3032) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3033) 	cfs_rq->nr_running--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3036) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3037)  * Signed add and clamp on underflow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3038)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3039)  * Explicitly do a load-store to ensure the intermediate value never hits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3040)  * memory. This allows lockless observations without ever seeing the negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3041)  * values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3042)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3043) #define add_positive(_ptr, _val) do {                           \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3044) 	typeof(_ptr) ptr = (_ptr);                              \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3045) 	typeof(_val) val = (_val);                              \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3046) 	typeof(*ptr) res, var = READ_ONCE(*ptr);                \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3047) 								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3048) 	res = var + val;                                        \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3049) 								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3050) 	if (val < 0 && res > var)                               \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3051) 		res = 0;                                        \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3052) 								\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3053) 	WRITE_ONCE(*ptr, res);                                  \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3054) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3056) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3057)  * Unsigned subtract and clamp on underflow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3058)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3059)  * Explicitly do a load-store to ensure the intermediate value never hits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3060)  * memory. This allows lockless observations without ever seeing the negative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3061)  * values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3062)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3063) #define sub_positive(_ptr, _val) do {				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3064) 	typeof(_ptr) ptr = (_ptr);				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3065) 	typeof(*ptr) val = (_val);				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3066) 	typeof(*ptr) res, var = READ_ONCE(*ptr);		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3067) 	res = var - val;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3068) 	if (res > var)						\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3069) 		res = 0;					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3070) 	WRITE_ONCE(*ptr, res);					\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3071) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3073) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3074)  * Remove and clamp on negative, from a local variable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3075)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3076)  * A variant of sub_positive(), which does not use explicit load-store
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3077)  * and is thus optimized for local variable updates.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3078)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3079) #define lsub_positive(_ptr, _val) do {				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3080) 	typeof(_ptr) ptr = (_ptr);				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3081) 	*ptr -= min_t(typeof(*ptr), *ptr, _val);		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3082) } while (0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3083) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3084) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3085) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3086) enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3087) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3088) 	cfs_rq->avg.load_avg += se->avg.load_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3089) 	cfs_rq->avg.load_sum += se_weight(se) * se->avg.load_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3092) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3093) dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3094) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3095) 	sub_positive(&cfs_rq->avg.load_avg, se->avg.load_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3096) 	sub_positive(&cfs_rq->avg.load_sum, se_weight(se) * se->avg.load_sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3098) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3099) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3100) enqueue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3101) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3102) dequeue_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3103) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3105) static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3106) 			    unsigned long weight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3108) 	if (se->on_rq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3109) 		/* commit outstanding execution time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3110) 		if (cfs_rq->curr == se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3111) 			update_curr(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3112) 		update_load_sub(&cfs_rq->load, se->load.weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3113) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3114) 	dequeue_load_avg(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3116) 	update_load_set(&se->load, weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3118) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3119) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3120) 		u32 divider = get_pelt_divider(&se->avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3122) 		se->avg.load_avg = div_u64(se_weight(se) * se->avg.load_sum, divider);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3123) 	} while (0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3124) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3126) 	enqueue_load_avg(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3127) 	if (se->on_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3128) 		update_load_add(&cfs_rq->load, se->load.weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3130) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3132) void reweight_task(struct task_struct *p, int prio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3133) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3134) 	struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3135) 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3136) 	struct load_weight *load = &se->load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3137) 	unsigned long weight = scale_load(sched_prio_to_weight[prio]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3139) 	reweight_entity(cfs_rq, se, weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3140) 	load->inv_weight = sched_prio_to_wmult[prio];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3143) #ifdef CONFIG_FAIR_GROUP_SCHED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3144) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3146)  * All this does is approximate the hierarchical proportion which includes that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3147)  * global sum we all love to hate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3148)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3149)  * That is, the weight of a group entity, is the proportional share of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3150)  * group weight based on the group runqueue weights. That is:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3151)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3152)  *                     tg->weight * grq->load.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3153)  *   ge->load.weight = -----------------------------               (1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3154)  *                       \Sum grq->load.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3155)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3156)  * Now, because computing that sum is prohibitively expensive to compute (been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3157)  * there, done that) we approximate it with this average stuff. The average
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3158)  * moves slower and therefore the approximation is cheaper and more stable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3159)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3160)  * So instead of the above, we substitute:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3161)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3162)  *   grq->load.weight -> grq->avg.load_avg                         (2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3163)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3164)  * which yields the following:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3165)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3166)  *                     tg->weight * grq->avg.load_avg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3167)  *   ge->load.weight = ------------------------------              (3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3168)  *                             tg->load_avg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3169)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3170)  * Where: tg->load_avg ~= \Sum grq->avg.load_avg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3171)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3172)  * That is shares_avg, and it is right (given the approximation (2)).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3173)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3174)  * The problem with it is that because the average is slow -- it was designed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3175)  * to be exactly that of course -- this leads to transients in boundary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3176)  * conditions. In specific, the case where the group was idle and we start the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3177)  * one task. It takes time for our CPU's grq->avg.load_avg to build up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3178)  * yielding bad latency etc..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3179)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3180)  * Now, in that special case (1) reduces to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3181)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3182)  *                     tg->weight * grq->load.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3183)  *   ge->load.weight = ----------------------------- = tg->weight   (4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3184)  *                         grp->load.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3185)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3186)  * That is, the sum collapses because all other CPUs are idle; the UP scenario.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3187)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3188)  * So what we do is modify our approximation (3) to approach (4) in the (near)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3189)  * UP case, like:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3190)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3191)  *   ge->load.weight =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3192)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3193)  *              tg->weight * grq->load.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3194)  *     ---------------------------------------------------         (5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3195)  *     tg->load_avg - grq->avg.load_avg + grq->load.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3196)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3197)  * But because grq->load.weight can drop to 0, resulting in a divide by zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3198)  * we need to use grq->avg.load_avg as its lower bound, which then gives:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3199)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3200)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3201)  *                     tg->weight * grq->load.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3202)  *   ge->load.weight = -----------------------------		   (6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3203)  *                             tg_load_avg'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3204)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3205)  * Where:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3206)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3207)  *   tg_load_avg' = tg->load_avg - grq->avg.load_avg +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3208)  *                  max(grq->load.weight, grq->avg.load_avg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3209)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3210)  * And that is shares_weight and is icky. In the (near) UP case it approaches
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3211)  * (4) while in the normal case it approaches (3). It consistently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3212)  * overestimates the ge->load.weight and therefore:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3213)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3214)  *   \Sum ge->load.weight >= tg->weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3215)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3216)  * hence icky!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3217)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3218) static long calc_group_shares(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3219) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3220) 	long tg_weight, tg_shares, load, shares;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3221) 	struct task_group *tg = cfs_rq->tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3223) 	tg_shares = READ_ONCE(tg->shares);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3225) 	load = max(scale_load_down(cfs_rq->load.weight), cfs_rq->avg.load_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3227) 	tg_weight = atomic_long_read(&tg->load_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3229) 	/* Ensure tg_weight >= load */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3230) 	tg_weight -= cfs_rq->tg_load_avg_contrib;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3231) 	tg_weight += load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3233) 	shares = (tg_shares * load);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3234) 	if (tg_weight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3235) 		shares /= tg_weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3237) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3238) 	 * MIN_SHARES has to be unscaled here to support per-CPU partitioning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3239) 	 * of a group with small tg->shares value. It is a floor value which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3240) 	 * assigned as a minimum load.weight to the sched_entity representing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3241) 	 * the group on a CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3242) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3243) 	 * E.g. on 64-bit for a group with tg->shares of scale_load(15)=15*1024
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3244) 	 * on an 8-core system with 8 tasks each runnable on one CPU shares has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3245) 	 * to be 15*1024*1/8=1920 instead of scale_load(MIN_SHARES)=2*1024. In
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3246) 	 * case no task is runnable on a CPU MIN_SHARES=2 should be returned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3247) 	 * instead of 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3248) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3249) 	return clamp_t(long, shares, MIN_SHARES, tg_shares);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3251) #endif /* CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3253) static inline int throttled_hierarchy(struct cfs_rq *cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3255) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3256)  * Recomputes the group entity based on the current state of its group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3257)  * runqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3258)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3259) static void update_cfs_group(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3261) 	struct cfs_rq *gcfs_rq = group_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3262) 	long shares;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3264) 	if (!gcfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3265) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3267) 	if (throttled_hierarchy(gcfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3268) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3270) #ifndef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3271) 	shares = READ_ONCE(gcfs_rq->tg->shares);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3273) 	if (likely(se->load.weight == shares))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3274) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3275) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3276) 	shares   = calc_group_shares(gcfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3277) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3279) 	reweight_entity(cfs_rq_of(se), se, shares);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3282) #else /* CONFIG_FAIR_GROUP_SCHED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3283) static inline void update_cfs_group(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3284) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3286) #endif /* CONFIG_FAIR_GROUP_SCHED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3288) static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3289) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3290) 	struct rq *rq = rq_of(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3292) 	if (&rq->cfs == cfs_rq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3293) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3294) 		 * There are a few boundary cases this might miss but it should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3295) 		 * get called often enough that that should (hopefully) not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3296) 		 * a real problem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3297) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3298) 		 * It will not get called when we go idle, because the idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3299) 		 * thread is a different class (!fair), nor will the utilization
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3300) 		 * number include things like RT tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3301) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3302) 		 * As is, the util number is not freq-invariant (we'd have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3303) 		 * implement arch_scale_freq_capacity() for that).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3304) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3305) 		 * See cpu_util().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3306) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3307) 		cpufreq_update_util(rq, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3308) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3311) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3312) #ifdef CONFIG_FAIR_GROUP_SCHED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3313) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3314)  * update_tg_load_avg - update the tg's load avg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3315)  * @cfs_rq: the cfs_rq whose avg changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3316)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3317)  * This function 'ensures': tg->load_avg := \Sum tg->cfs_rq[]->avg.load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3318)  * However, because tg->load_avg is a global value there are performance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3319)  * considerations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3320)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3321)  * In order to avoid having to look at the other cfs_rq's, we use a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3322)  * differential update where we store the last value we propagated. This in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3323)  * turn allows skipping updates if the differential is 'small'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3324)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3325)  * Updating tg's load_avg is necessary before update_cfs_share().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3326)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3327) static inline void update_tg_load_avg(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3329) 	long delta = cfs_rq->avg.load_avg - cfs_rq->tg_load_avg_contrib;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3331) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3332) 	 * No need to update load_avg for root_task_group as it is not used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3333) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3334) 	if (cfs_rq->tg == &root_task_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3335) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3337) 	if (abs(delta) > cfs_rq->tg_load_avg_contrib / 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3338) 		atomic_long_add(delta, &cfs_rq->tg->load_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3339) 		cfs_rq->tg_load_avg_contrib = cfs_rq->avg.load_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3340) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3344)  * Called within set_task_rq() right before setting a task's CPU. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3345)  * caller only guarantees p->pi_lock is held; no other assumptions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3346)  * including the state of rq->lock, should be made.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3347)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3348) void set_task_rq_fair(struct sched_entity *se,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3349) 		      struct cfs_rq *prev, struct cfs_rq *next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3351) 	u64 p_last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3352) 	u64 n_last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3354) 	if (!sched_feat(ATTACH_AGE_LOAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3355) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3357) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3358) 	 * We are supposed to update the task to "current" time, then its up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3359) 	 * date and ready to go to new CPU/cfs_rq. But we have difficulty in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3360) 	 * getting what current time is, so simply throw away the out-of-date
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3361) 	 * time. This will result in the wakee task is less decayed, but giving
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3362) 	 * the wakee more load sounds not bad.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3363) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3364) 	if (!(se->avg.last_update_time && prev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3365) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3367) #ifndef CONFIG_64BIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3368) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3369) 		u64 p_last_update_time_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3370) 		u64 n_last_update_time_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3372) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3373) 			p_last_update_time_copy = prev->load_last_update_time_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3374) 			n_last_update_time_copy = next->load_last_update_time_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3376) 			smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3378) 			p_last_update_time = prev->avg.last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3379) 			n_last_update_time = next->avg.last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3381) 		} while (p_last_update_time != p_last_update_time_copy ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3382) 			 n_last_update_time != n_last_update_time_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3383) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3384) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3385) 	p_last_update_time = prev->avg.last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3386) 	n_last_update_time = next->avg.last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3387) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3388) 	__update_load_avg_blocked_se(p_last_update_time, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3389) 	se->avg.last_update_time = n_last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3392) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3393)  * When on migration a sched_entity joins/leaves the PELT hierarchy, we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3394)  * propagate its contribution. The key to this propagation is the invariant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3395)  * that for each group:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3396)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3397)  *   ge->avg == grq->avg						(1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3398)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3399)  * _IFF_ we look at the pure running and runnable sums. Because they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3400)  * represent the very same entity, just at different points in the hierarchy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3401)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3402)  * Per the above update_tg_cfs_util() and update_tg_cfs_runnable() are trivial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3403)  * and simply copies the running/runnable sum over (but still wrong, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3404)  * the group entity and group rq do not have their PELT windows aligned).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3405)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3406)  * However, update_tg_cfs_load() is more complex. So we have:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3407)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3408)  *   ge->avg.load_avg = ge->load.weight * ge->avg.runnable_avg		(2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3409)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3410)  * And since, like util, the runnable part should be directly transferable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3411)  * the following would _appear_ to be the straight forward approach:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3412)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3413)  *   grq->avg.load_avg = grq->load.weight * grq->avg.runnable_avg	(3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3414)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3415)  * And per (1) we have:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3416)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3417)  *   ge->avg.runnable_avg == grq->avg.runnable_avg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3418)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3419)  * Which gives:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3420)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3421)  *                      ge->load.weight * grq->avg.load_avg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3422)  *   ge->avg.load_avg = -----------------------------------		(4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3423)  *                               grq->load.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3424)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3425)  * Except that is wrong!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3426)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3427)  * Because while for entities historical weight is not important and we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3428)  * really only care about our future and therefore can consider a pure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3429)  * runnable sum, runqueues can NOT do this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3430)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3431)  * We specifically want runqueues to have a load_avg that includes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3432)  * historical weights. Those represent the blocked load, the load we expect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3433)  * to (shortly) return to us. This only works by keeping the weights as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3434)  * integral part of the sum. We therefore cannot decompose as per (3).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3435)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3436)  * Another reason this doesn't work is that runnable isn't a 0-sum entity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3437)  * Imagine a rq with 2 tasks that each are runnable 2/3 of the time. Then the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3438)  * rq itself is runnable anywhere between 2/3 and 1 depending on how the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3439)  * runnable section of these tasks overlap (or not). If they were to perfectly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3440)  * align the rq as a whole would be runnable 2/3 of the time. If however we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3441)  * always have at least 1 runnable task, the rq as a whole is always runnable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3442)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3443)  * So we'll have to approximate.. :/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3444)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3445)  * Given the constraint:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3446)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3447)  *   ge->avg.running_sum <= ge->avg.runnable_sum <= LOAD_AVG_MAX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3448)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3449)  * We can construct a rule that adds runnable to a rq by assuming minimal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3450)  * overlap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3451)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3452)  * On removal, we'll assume each task is equally runnable; which yields:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3453)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3454)  *   grq->avg.runnable_sum = grq->avg.load_sum / grq->load.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3455)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3456)  * XXX: only do this for the part of runnable > running ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3457)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3458)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3459) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3460) update_tg_cfs_util(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3461) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3462) 	long delta = gcfs_rq->avg.util_avg - se->avg.util_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3463) 	u32 divider;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3465) 	/* Nothing to update */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3466) 	if (!delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3467) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3469) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3470) 	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3471) 	 * See ___update_load_avg() for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3472) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3473) 	divider = get_pelt_divider(&cfs_rq->avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3475) 	/* Set new sched_entity's utilization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3476) 	se->avg.util_avg = gcfs_rq->avg.util_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3477) 	se->avg.util_sum = se->avg.util_avg * divider;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3479) 	/* Update parent cfs_rq utilization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3480) 	add_positive(&cfs_rq->avg.util_avg, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3481) 	cfs_rq->avg.util_sum = cfs_rq->avg.util_avg * divider;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3484) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3485) update_tg_cfs_runnable(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3486) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3487) 	long delta = gcfs_rq->avg.runnable_avg - se->avg.runnable_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3488) 	u32 divider;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3489) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3490) 	/* Nothing to update */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3491) 	if (!delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3492) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3494) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3495) 	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3496) 	 * See ___update_load_avg() for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3497) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3498) 	divider = get_pelt_divider(&cfs_rq->avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3500) 	/* Set new sched_entity's runnable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3501) 	se->avg.runnable_avg = gcfs_rq->avg.runnable_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3502) 	se->avg.runnable_sum = se->avg.runnable_avg * divider;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3504) 	/* Update parent cfs_rq runnable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3505) 	add_positive(&cfs_rq->avg.runnable_avg, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3506) 	cfs_rq->avg.runnable_sum = cfs_rq->avg.runnable_avg * divider;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3509) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3510) update_tg_cfs_load(struct cfs_rq *cfs_rq, struct sched_entity *se, struct cfs_rq *gcfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3511) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3512) 	long delta_avg, running_sum, runnable_sum = gcfs_rq->prop_runnable_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3513) 	unsigned long load_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3514) 	u64 load_sum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3515) 	s64 delta_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3516) 	u32 divider;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3517) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3518) 	if (!runnable_sum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3519) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3521) 	gcfs_rq->prop_runnable_sum = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3523) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3524) 	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3525) 	 * See ___update_load_avg() for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3526) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3527) 	divider = get_pelt_divider(&cfs_rq->avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3529) 	if (runnable_sum >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3530) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3531) 		 * Add runnable; clip at LOAD_AVG_MAX. Reflects that until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3532) 		 * the CPU is saturated running == runnable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3533) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3534) 		runnable_sum += se->avg.load_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3535) 		runnable_sum = min_t(long, runnable_sum, divider);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3536) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3537) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3538) 		 * Estimate the new unweighted runnable_sum of the gcfs_rq by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3539) 		 * assuming all tasks are equally runnable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3540) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3541) 		if (scale_load_down(gcfs_rq->load.weight)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3542) 			load_sum = div_s64(gcfs_rq->avg.load_sum,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3543) 				scale_load_down(gcfs_rq->load.weight));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3544) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3546) 		/* But make sure to not inflate se's runnable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3547) 		runnable_sum = min(se->avg.load_sum, load_sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3548) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3550) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3551) 	 * runnable_sum can't be lower than running_sum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3552) 	 * Rescale running sum to be in the same range as runnable sum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3553) 	 * running_sum is in [0 : LOAD_AVG_MAX <<  SCHED_CAPACITY_SHIFT]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3554) 	 * runnable_sum is in [0 : LOAD_AVG_MAX]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3555) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3556) 	running_sum = se->avg.util_sum >> SCHED_CAPACITY_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3557) 	runnable_sum = max(runnable_sum, running_sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3559) 	load_sum = (s64)se_weight(se) * runnable_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3560) 	load_avg = div_s64(load_sum, divider);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3562) 	delta_sum = load_sum - (s64)se_weight(se) * se->avg.load_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3563) 	delta_avg = load_avg - se->avg.load_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3565) 	se->avg.load_sum = runnable_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3566) 	se->avg.load_avg = load_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3567) 	add_positive(&cfs_rq->avg.load_avg, delta_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3568) 	add_positive(&cfs_rq->avg.load_sum, delta_sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3571) static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3572) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3573) 	cfs_rq->propagate = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3574) 	cfs_rq->prop_runnable_sum += runnable_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3577) /* Update task and its cfs_rq load average */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3578) static inline int propagate_entity_load_avg(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3580) 	struct cfs_rq *cfs_rq, *gcfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3582) 	if (entity_is_task(se))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3583) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3585) 	gcfs_rq = group_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3586) 	if (!gcfs_rq->propagate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3587) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3589) 	gcfs_rq->propagate = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3591) 	cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3593) 	add_tg_cfs_propagate(cfs_rq, gcfs_rq->prop_runnable_sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3595) 	update_tg_cfs_util(cfs_rq, se, gcfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3596) 	update_tg_cfs_runnable(cfs_rq, se, gcfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3597) 	update_tg_cfs_load(cfs_rq, se, gcfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3599) 	trace_pelt_cfs_tp(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3600) 	trace_pelt_se_tp(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3602) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3605) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3606)  * Check if we need to update the load and the utilization of a blocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3607)  * group_entity:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3608)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3609) static inline bool skip_blocked_update(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3610) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3611) 	struct cfs_rq *gcfs_rq = group_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3613) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3614) 	 * If sched_entity still have not zero load or utilization, we have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3615) 	 * decay it:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3616) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3617) 	if (se->avg.load_avg || se->avg.util_avg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3618) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3620) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3621) 	 * If there is a pending propagation, we have to update the load and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3622) 	 * the utilization of the sched_entity:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3623) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3624) 	if (gcfs_rq->propagate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3625) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3627) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3628) 	 * Otherwise, the load and the utilization of the sched_entity is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3629) 	 * already zero and there is no pending propagation, so it will be a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3630) 	 * waste of time to try to decay it:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3631) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3632) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3635) #else /* CONFIG_FAIR_GROUP_SCHED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3637) static inline void update_tg_load_avg(struct cfs_rq *cfs_rq) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3639) static inline int propagate_entity_load_avg(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3640) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3641) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3644) static inline void add_tg_cfs_propagate(struct cfs_rq *cfs_rq, long runnable_sum) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3646) #endif /* CONFIG_FAIR_GROUP_SCHED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3648) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3649)  * update_cfs_rq_load_avg - update the cfs_rq's load/util averages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3650)  * @now: current time, as per cfs_rq_clock_pelt()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3651)  * @cfs_rq: cfs_rq to update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3652)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3653)  * The cfs_rq avg is the direct sum of all its entities (blocked and runnable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3654)  * avg. The immediate corollary is that all (fair) tasks must be attached, see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3655)  * post_init_entity_util_avg().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3656)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3657)  * cfs_rq->avg is used for task_h_load() and update_cfs_share() for example.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3658)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3659)  * Returns true if the load decayed or we removed load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3660)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3661)  * Since both these conditions indicate a changed cfs_rq->avg.load we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3662)  * call update_tg_load_avg() when this function returns true.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3663)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3664) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3665) update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3666) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3667) 	unsigned long removed_load = 0, removed_util = 0, removed_runnable = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3668) 	struct sched_avg *sa = &cfs_rq->avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3669) 	int decayed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3671) 	if (cfs_rq->removed.nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3672) 		unsigned long r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3673) 		u32 divider = get_pelt_divider(&cfs_rq->avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3675) 		raw_spin_lock(&cfs_rq->removed.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3676) 		swap(cfs_rq->removed.util_avg, removed_util);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3677) 		swap(cfs_rq->removed.load_avg, removed_load);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3678) 		swap(cfs_rq->removed.runnable_avg, removed_runnable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3679) 		cfs_rq->removed.nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3680) 		raw_spin_unlock(&cfs_rq->removed.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3681) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3682) 		r = removed_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3683) 		sub_positive(&sa->load_avg, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3684) 		sub_positive(&sa->load_sum, r * divider);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3686) 		r = removed_util;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3687) 		sub_positive(&sa->util_avg, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3688) 		sub_positive(&sa->util_sum, r * divider);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3689) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3690) 		 * Because of rounding, se->util_sum might ends up being +1 more than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3691) 		 * cfs->util_sum. Although this is not a problem by itself, detaching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3692) 		 * a lot of tasks with the rounding problem between 2 updates of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3693) 		 * util_avg (~1ms) can make cfs->util_sum becoming null whereas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3694) 		 * cfs_util_avg is not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3695) 		 * Check that util_sum is still above its lower bound for the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3696) 		 * util_avg. Given that period_contrib might have moved since the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3697) 		 * sync, we are only sure that util_sum must be above or equal to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3698) 		 *    util_avg * minimum possible divider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3699) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3700) 		sa->util_sum = max_t(u32, sa->util_sum, sa->util_avg * PELT_MIN_DIVIDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3702) 		r = removed_runnable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3703) 		sub_positive(&sa->runnable_avg, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3704) 		sub_positive(&sa->runnable_sum, r * divider);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3706) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3707) 		 * removed_runnable is the unweighted version of removed_load so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3708) 		 * can use it to estimate removed_load_sum.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3709) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3710) 		add_tg_cfs_propagate(cfs_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3711) 			-(long)(removed_runnable * divider) >> SCHED_CAPACITY_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3713) 		decayed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3714) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3716) 	decayed |= __update_load_avg_cfs_rq(now, cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3718) #ifndef CONFIG_64BIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3719) 	smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3720) 	cfs_rq->load_last_update_time_copy = sa->last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3721) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3723) 	return decayed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3726) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3727)  * attach_entity_load_avg - attach this entity to its cfs_rq load avg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3728)  * @cfs_rq: cfs_rq to attach to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3729)  * @se: sched_entity to attach
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3730)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3731)  * Must call update_cfs_rq_load_avg() before this, since we rely on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3732)  * cfs_rq->avg.last_update_time being current.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3733)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3734) static void attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3735) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3736) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3737) 	 * cfs_rq->avg.period_contrib can be used for both cfs_rq and se.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3738) 	 * See ___update_load_avg() for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3739) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3740) 	u32 divider = get_pelt_divider(&cfs_rq->avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3742) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3743) 	 * When we attach the @se to the @cfs_rq, we must align the decay
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3744) 	 * window because without that, really weird and wonderful things can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3745) 	 * happen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3746) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3747) 	 * XXX illustrate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3748) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3749) 	se->avg.last_update_time = cfs_rq->avg.last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3750) 	se->avg.period_contrib = cfs_rq->avg.period_contrib;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3752) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3753) 	 * Hell(o) Nasty stuff.. we need to recompute _sum based on the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3754) 	 * period_contrib. This isn't strictly correct, but since we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3755) 	 * entirely outside of the PELT hierarchy, nobody cares if we truncate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3756) 	 * _sum a little.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3757) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3758) 	se->avg.util_sum = se->avg.util_avg * divider;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3760) 	se->avg.runnable_sum = se->avg.runnable_avg * divider;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3762) 	se->avg.load_sum = divider;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3763) 	if (se_weight(se)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3764) 		se->avg.load_sum =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3765) 			div_u64(se->avg.load_avg * se->avg.load_sum, se_weight(se));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3766) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3768) 	enqueue_load_avg(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3769) 	cfs_rq->avg.util_avg += se->avg.util_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3770) 	cfs_rq->avg.util_sum += se->avg.util_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3771) 	cfs_rq->avg.runnable_avg += se->avg.runnable_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3772) 	cfs_rq->avg.runnable_sum += se->avg.runnable_sum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3774) 	add_tg_cfs_propagate(cfs_rq, se->avg.load_sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3776) 	cfs_rq_util_change(cfs_rq, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3778) 	trace_pelt_cfs_tp(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3781) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3782)  * detach_entity_load_avg - detach this entity from its cfs_rq load avg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3783)  * @cfs_rq: cfs_rq to detach from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3784)  * @se: sched_entity to detach
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3785)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3786)  * Must call update_cfs_rq_load_avg() before this, since we rely on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3787)  * cfs_rq->avg.last_update_time being current.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3788)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3789) static void detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3791) 	dequeue_load_avg(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3792) 	sub_positive(&cfs_rq->avg.util_avg, se->avg.util_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3793) 	sub_positive(&cfs_rq->avg.util_sum, se->avg.util_sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3794) 	sub_positive(&cfs_rq->avg.runnable_avg, se->avg.runnable_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3795) 	sub_positive(&cfs_rq->avg.runnable_sum, se->avg.runnable_sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3797) 	add_tg_cfs_propagate(cfs_rq, -se->avg.load_sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3799) 	cfs_rq_util_change(cfs_rq, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3801) 	trace_pelt_cfs_tp(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3802) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3804) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3805)  * Optional action to be done while updating the load average
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3806)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3807) #define UPDATE_TG	0x1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3808) #define SKIP_AGE_LOAD	0x2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3809) #define DO_ATTACH	0x4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3811) /* Update task and its cfs_rq load average */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3812) static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3813) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3814) 	u64 now = cfs_rq_clock_pelt(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3815) 	int decayed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3817) 	trace_android_vh_prepare_update_load_avg_se(se, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3818) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3819) 	 * Track task load average for carrying it to new CPU after migrated, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3820) 	 * track group sched_entity load average for task_h_load calc in migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3821) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3822) 	if (se->avg.last_update_time && !(flags & SKIP_AGE_LOAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3823) 		__update_load_avg_se(now, cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3824) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3825) 	trace_android_vh_finish_update_load_avg_se(se, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3826) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3827) 	decayed  = update_cfs_rq_load_avg(now, cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3828) 	decayed |= propagate_entity_load_avg(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3830) 	if (!se->avg.last_update_time && (flags & DO_ATTACH)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3832) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3833) 		 * DO_ATTACH means we're here from enqueue_entity().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3834) 		 * !last_update_time means we've passed through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3835) 		 * migrate_task_rq_fair() indicating we migrated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3836) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3837) 		 * IOW we're enqueueing a task on a new CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3838) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3839) 		attach_entity_load_avg(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3840) 		update_tg_load_avg(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3842) 	} else if (decayed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3843) 		cfs_rq_util_change(cfs_rq, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3845) 		if (flags & UPDATE_TG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3846) 			update_tg_load_avg(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3847) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3850) #ifndef CONFIG_64BIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3851) static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3853) 	u64 last_update_time_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3854) 	u64 last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3856) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3857) 		last_update_time_copy = cfs_rq->load_last_update_time_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3858) 		smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3859) 		last_update_time = cfs_rq->avg.last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3860) 	} while (last_update_time != last_update_time_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3862) 	return last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3864) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3865) static inline u64 cfs_rq_last_update_time(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3866) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3867) 	return cfs_rq->avg.last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3868) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3869) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3871) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3872)  * Synchronize entity load avg of dequeued entity without locking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3873)  * the previous rq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3874)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3875) static void sync_entity_load_avg(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3876) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3877) 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3878) 	u64 last_update_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3880) 	last_update_time = cfs_rq_last_update_time(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3881) 	trace_android_vh_prepare_update_load_avg_se(se, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3882) 	__update_load_avg_blocked_se(last_update_time, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3883) 	trace_android_vh_finish_update_load_avg_se(se, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3886) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3887)  * Task first catches up with cfs_rq, and then subtract
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3888)  * itself from the cfs_rq (task must be off the queue now).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3889)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3890) static void remove_entity_load_avg(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3891) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3892) 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3893) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3895) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3896) 	 * tasks cannot exit without having gone through wake_up_new_task() ->
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3897) 	 * post_init_entity_util_avg() which will have added things to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3898) 	 * cfs_rq, so we can remove unconditionally.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3899) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3901) 	sync_entity_load_avg(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3903) 	raw_spin_lock_irqsave(&cfs_rq->removed.lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3904) 	++cfs_rq->removed.nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3905) 	cfs_rq->removed.util_avg	+= se->avg.util_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3906) 	cfs_rq->removed.load_avg	+= se->avg.load_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3907) 	cfs_rq->removed.runnable_avg	+= se->avg.runnable_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3908) 	raw_spin_unlock_irqrestore(&cfs_rq->removed.lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3911) static inline unsigned long cfs_rq_runnable_avg(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3912) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3913) 	return cfs_rq->avg.runnable_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3914) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3915) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3916) static inline unsigned long cfs_rq_load_avg(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3917) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3918) 	return cfs_rq->avg.load_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3919) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3921) static int newidle_balance(struct rq *this_rq, struct rq_flags *rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3923) static inline unsigned long task_util(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3924) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3925) 	return READ_ONCE(p->se.avg.util_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3926) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3928) static inline unsigned long _task_util_est(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3929) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3930) 	struct util_est ue = READ_ONCE(p->se.avg.util_est);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3932) 	return max(ue.ewma, (ue.enqueued & ~UTIL_AVG_UNCHANGED));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3935) static inline unsigned long task_util_est(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3937) 	return max(task_util(p), _task_util_est(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3940) #ifdef CONFIG_UCLAMP_TASK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3941) static inline unsigned long uclamp_task_util(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3942) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3943) 	return clamp(task_util_est(p),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3944) 		     uclamp_eff_value(p, UCLAMP_MIN),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3945) 		     uclamp_eff_value(p, UCLAMP_MAX));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3947) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3948) static inline unsigned long uclamp_task_util(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3949) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3950) 	return task_util_est(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3952) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3954) static inline void util_est_enqueue(struct cfs_rq *cfs_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3955) 				    struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3956) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3957) 	unsigned int enqueued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3959) 	if (!sched_feat(UTIL_EST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3960) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3962) 	/* Update root cfs_rq's estimated utilization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3963) 	enqueued  = cfs_rq->avg.util_est.enqueued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3964) 	enqueued += _task_util_est(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3965) 	WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3967) 	trace_sched_util_est_cfs_tp(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3970) static inline void util_est_dequeue(struct cfs_rq *cfs_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3971) 				    struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3972) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3973) 	unsigned int enqueued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3975) 	if (!sched_feat(UTIL_EST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3976) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3978) 	/* Update root cfs_rq's estimated utilization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3979) 	enqueued  = cfs_rq->avg.util_est.enqueued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3980) 	enqueued -= min_t(unsigned int, enqueued, _task_util_est(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3981) 	WRITE_ONCE(cfs_rq->avg.util_est.enqueued, enqueued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3983) 	trace_sched_util_est_cfs_tp(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3986) #define UTIL_EST_MARGIN (SCHED_CAPACITY_SCALE / 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3988) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3989)  * Check if a (signed) value is within a specified (unsigned) margin,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3990)  * based on the observation that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3991)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3992)  *     abs(x) < y := (unsigned)(x + y - 1) < (2 * y - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3993)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3994)  * NOTE: this only works when value + maring < INT_MAX.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3995)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3996) static inline bool within_margin(int value, int margin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3997) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3998) 	return ((unsigned int)(value + margin - 1) < (2 * margin - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  3999) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4000) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4001) static inline void util_est_update(struct cfs_rq *cfs_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4002) 				   struct task_struct *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4003) 				   bool task_sleep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4004) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4005) 	long last_ewma_diff, last_enqueued_diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4006) 	struct util_est ue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4007) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4009) 	trace_android_rvh_util_est_update(cfs_rq, p, task_sleep, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4010) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4011) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4012) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4013) 	if (!sched_feat(UTIL_EST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4014) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4016) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4017) 	 * Skip update of task's estimated utilization when the task has not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4018) 	 * yet completed an activation, e.g. being migrated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4019) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4020) 	if (!task_sleep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4021) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4023) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4024) 	 * If the PELT values haven't changed since enqueue time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4025) 	 * skip the util_est update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4026) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4027) 	ue = p->se.avg.util_est;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4028) 	if (ue.enqueued & UTIL_AVG_UNCHANGED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4029) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4031) 	last_enqueued_diff = ue.enqueued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4033) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4034) 	 * Reset EWMA on utilization increases, the moving average is used only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4035) 	 * to smooth utilization decreases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4036) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4037) 	ue.enqueued = task_util(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4038) 	if (sched_feat(UTIL_EST_FASTUP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4039) 		if (ue.ewma < ue.enqueued) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4040) 			ue.ewma = ue.enqueued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4041) 			goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4042) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4043) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4044) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4045) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4046) 	 * Skip update of task's estimated utilization when its members are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4047) 	 * already ~1% close to its last activation value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4048) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4049) 	last_ewma_diff = ue.enqueued - ue.ewma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4050) 	last_enqueued_diff -= ue.enqueued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4051) 	if (within_margin(last_ewma_diff, UTIL_EST_MARGIN)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4052) 		if (!within_margin(last_enqueued_diff, UTIL_EST_MARGIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4053) 			goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4054) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4055) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4056) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4058) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4059) 	 * To avoid overestimation of actual task utilization, skip updates if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4060) 	 * we cannot grant there is idle time in this CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4061) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4062) 	if (task_util(p) > capacity_orig_of(cpu_of(rq_of(cfs_rq))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4063) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4065) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4066) 	 * Update Task's estimated utilization
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4067) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4068) 	 * When *p completes an activation we can consolidate another sample
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4069) 	 * of the task size. This is done by storing the current PELT value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4070) 	 * as ue.enqueued and by using this value to update the Exponential
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4071) 	 * Weighted Moving Average (EWMA):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4072) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4073) 	 *  ewma(t) = w *  task_util(p) + (1-w) * ewma(t-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4074) 	 *          = w *  task_util(p) +         ewma(t-1)  - w * ewma(t-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4075) 	 *          = w * (task_util(p) -         ewma(t-1)) +     ewma(t-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4076) 	 *          = w * (      last_ewma_diff            ) +     ewma(t-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4077) 	 *          = w * (last_ewma_diff  +  ewma(t-1) / w)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4078) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4079) 	 * Where 'w' is the weight of new samples, which is configured to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4080) 	 * 0.25, thus making w=1/4 ( >>= UTIL_EST_WEIGHT_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4081) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4082) 	ue.ewma <<= UTIL_EST_WEIGHT_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4083) 	ue.ewma  += last_ewma_diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4084) 	ue.ewma >>= UTIL_EST_WEIGHT_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4085) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4086) 	ue.enqueued |= UTIL_AVG_UNCHANGED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4087) 	WRITE_ONCE(p->se.avg.util_est, ue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4088) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4089) 	trace_sched_util_est_se_tp(&p->se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4092) static inline int task_fits_capacity(struct task_struct *p, long capacity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4093) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4094) 	return fits_capacity(uclamp_task_util(p), capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4095) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4097) static inline void update_misfit_status(struct task_struct *p, struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4098) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4099) 	bool need_update = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4101) 	trace_android_rvh_update_misfit_status(p, rq, &need_update);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4102) 	if (!static_branch_unlikely(&sched_asym_cpucapacity) || !need_update)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4103) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4105) 	if (!p || p->nr_cpus_allowed == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4106) 		rq->misfit_task_load = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4107) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4108) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4110) 	if (task_fits_capacity(p, capacity_of(cpu_of(rq)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4111) 		rq->misfit_task_load = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4112) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4113) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4115) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4116) 	 * Make sure that misfit_task_load will not be null even if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4117) 	 * task_h_load() returns 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4118) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4119) 	rq->misfit_task_load = max_t(unsigned long, task_h_load(p), 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4122) #else /* CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4124) #define UPDATE_TG	0x0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4125) #define SKIP_AGE_LOAD	0x0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4126) #define DO_ATTACH	0x0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4128) static inline void update_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se, int not_used1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4129) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4130) 	cfs_rq_util_change(cfs_rq, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4133) static inline void remove_entity_load_avg(struct sched_entity *se) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4135) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4136) attach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4137) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4138) detach_entity_load_avg(struct cfs_rq *cfs_rq, struct sched_entity *se) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4140) static inline int newidle_balance(struct rq *rq, struct rq_flags *rf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4141) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4142) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4145) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4146) util_est_enqueue(struct cfs_rq *cfs_rq, struct task_struct *p) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4148) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4149) util_est_dequeue(struct cfs_rq *cfs_rq, struct task_struct *p) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4151) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4152) util_est_update(struct cfs_rq *cfs_rq, struct task_struct *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4153) 		bool task_sleep) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4154) static inline void update_misfit_status(struct task_struct *p, struct rq *rq) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4156) #endif /* CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4158) static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4160) #ifdef CONFIG_SCHED_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4161) 	s64 d = se->vruntime - cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4163) 	if (d < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4164) 		d = -d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4166) 	if (d > 3*sysctl_sched_latency)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4167) 		schedstat_inc(cfs_rq->nr_spread_over);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4168) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4171) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4172) place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4174) 	u64 vruntime = cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4176) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4177) 	 * The 'current' period is already promised to the current tasks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4178) 	 * however the extra weight of the new task will slow them down a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4179) 	 * little, place the new task so that it fits in the slot that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4180) 	 * stays open at the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4181) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4182) 	if (initial && sched_feat(START_DEBIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4183) 		vruntime += sched_vslice(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4185) 	/* sleeps up to a single latency don't count. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4186) 	if (!initial) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4187) 		unsigned long thresh = sysctl_sched_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4189) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4190) 		 * Halve their sleep time's effect, to allow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4191) 		 * for a gentler effect of sleepers:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4192) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4193) 		if (sched_feat(GENTLE_FAIR_SLEEPERS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4194) 			thresh >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4196) 		vruntime -= thresh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4197) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4199) 	/* ensure we never gain time by being placed backwards. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4200) 	se->vruntime = max_vruntime(se->vruntime, vruntime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4201) 	trace_android_rvh_place_entity(cfs_rq, se, initial, vruntime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4204) static void check_enqueue_throttle(struct cfs_rq *cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4206) static inline void check_schedstat_required(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4208) #ifdef CONFIG_SCHEDSTATS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4209) 	if (schedstat_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4210) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4212) 	/* Force schedstat enabled if a dependent tracepoint is active */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4213) 	if (trace_sched_stat_wait_enabled()    ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4214) 			trace_sched_stat_sleep_enabled()   ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4215) 			trace_sched_stat_iowait_enabled()  ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4216) 			trace_sched_stat_blocked_enabled() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4217) 			trace_sched_stat_runtime_enabled())  {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4218) 		printk_deferred_once("Scheduler tracepoints stat_sleep, stat_iowait, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4219) 			     "stat_blocked and stat_runtime require the "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4220) 			     "kernel parameter schedstats=enable or "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4221) 			     "kernel.sched_schedstats=1\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4222) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4223) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4226) static inline bool cfs_bandwidth_used(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4229)  * MIGRATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4230)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4231)  *	dequeue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4232)  *	  update_curr()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4233)  *	    update_min_vruntime()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4234)  *	  vruntime -= min_vruntime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4235)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4236)  *	enqueue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4237)  *	  update_curr()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4238)  *	    update_min_vruntime()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4239)  *	  vruntime += min_vruntime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4240)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4241)  * this way the vruntime transition between RQs is done when both
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4242)  * min_vruntime are up-to-date.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4243)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4244)  * WAKEUP (remote)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4245)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4246)  *	->migrate_task_rq_fair() (p->state == TASK_WAKING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4247)  *	  vruntime -= min_vruntime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4248)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4249)  *	enqueue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4250)  *	  update_curr()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4251)  *	    update_min_vruntime()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4252)  *	  vruntime += min_vruntime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4253)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4254)  * this way we don't have the most up-to-date min_vruntime on the originating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4255)  * CPU and an up-to-date min_vruntime on the destination CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4256)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4258) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4259) enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4261) 	bool renorm = !(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4262) 	bool curr = cfs_rq->curr == se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4264) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4265) 	 * If we're the current task, we must renormalise before calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4266) 	 * update_curr().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4267) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4268) 	if (renorm && curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4269) 		se->vruntime += cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4271) 	update_curr(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4273) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4274) 	 * Otherwise, renormalise after, such that we're placed at the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4275) 	 * moment in time, instead of some random moment in the past. Being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4276) 	 * placed in the past could significantly boost this task to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4277) 	 * fairness detriment of existing tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4278) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4279) 	if (renorm && !curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4280) 		se->vruntime += cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4282) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4283) 	 * When enqueuing a sched_entity, we must:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4284) 	 *   - Update loads to have both entity and cfs_rq synced with now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4285) 	 *   - Add its load to cfs_rq->runnable_avg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4286) 	 *   - For group_entity, update its weight to reflect the new share of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4287) 	 *     its group cfs_rq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4288) 	 *   - Add its new weight to cfs_rq->load.weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4289) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4290) 	update_load_avg(cfs_rq, se, UPDATE_TG | DO_ATTACH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4291) 	se_update_runnable(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4292) 	update_cfs_group(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4293) 	account_entity_enqueue(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4295) 	if (flags & ENQUEUE_WAKEUP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4296) 		place_entity(cfs_rq, se, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4297) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4298) 	check_schedstat_required();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4299) 	update_stats_enqueue(cfs_rq, se, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4300) 	check_spread(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4301) 	if (!curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4302) 		__enqueue_entity(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4303) 	se->on_rq = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4305) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4306) 	 * When bandwidth control is enabled, cfs might have been removed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4307) 	 * because of a parent been throttled but cfs->nr_running > 1. Try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4308) 	 * add it unconditionnally.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4309) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4310) 	if (cfs_rq->nr_running == 1 || cfs_bandwidth_used())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4311) 		list_add_leaf_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4313) 	if (cfs_rq->nr_running == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4314) 		check_enqueue_throttle(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4317) static void __clear_buddies_last(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4319) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4320) 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4321) 		if (cfs_rq->last != se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4322) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4324) 		cfs_rq->last = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4325) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4328) static void __clear_buddies_next(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4330) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4331) 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4332) 		if (cfs_rq->next != se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4333) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4335) 		cfs_rq->next = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4336) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4339) static void __clear_buddies_skip(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4340) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4341) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4342) 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4343) 		if (cfs_rq->skip != se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4344) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4346) 		cfs_rq->skip = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4347) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4350) static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4351) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4352) 	if (cfs_rq->last == se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4353) 		__clear_buddies_last(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4355) 	if (cfs_rq->next == se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4356) 		__clear_buddies_next(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4358) 	if (cfs_rq->skip == se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4359) 		__clear_buddies_skip(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4362) static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4364) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4365) dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4367) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4368) 	 * Update run-time statistics of the 'current'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4369) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4370) 	update_curr(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4372) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4373) 	 * When dequeuing a sched_entity, we must:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4374) 	 *   - Update loads to have both entity and cfs_rq synced with now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4375) 	 *   - Subtract its load from the cfs_rq->runnable_avg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4376) 	 *   - Subtract its previous weight from cfs_rq->load.weight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4377) 	 *   - For group entity, update its weight to reflect the new share
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4378) 	 *     of its group cfs_rq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4379) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4380) 	update_load_avg(cfs_rq, se, UPDATE_TG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4381) 	se_update_runnable(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4383) 	update_stats_dequeue(cfs_rq, se, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4385) 	clear_buddies(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4387) 	if (se != cfs_rq->curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4388) 		__dequeue_entity(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4389) 	se->on_rq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4390) 	account_entity_dequeue(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4392) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4393) 	 * Normalize after update_curr(); which will also have moved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4394) 	 * min_vruntime if @se is the one holding it back. But before doing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4395) 	 * update_min_vruntime() again, which will discount @se's position and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4396) 	 * can move min_vruntime forward still more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4397) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4398) 	if (!(flags & DEQUEUE_SLEEP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4399) 		se->vruntime -= cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4401) 	/* return excess runtime on last dequeue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4402) 	return_cfs_rq_runtime(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4404) 	update_cfs_group(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4406) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4407) 	 * Now advance min_vruntime if @se was the entity holding it back,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4408) 	 * except when: DEQUEUE_SAVE && !DEQUEUE_MOVE, in this case we'll be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4409) 	 * put back on, and if we advance min_vruntime, we'll be placed back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4410) 	 * further than we started -- ie. we'll be penalized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4411) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4412) 	if ((flags & (DEQUEUE_SAVE | DEQUEUE_MOVE)) != DEQUEUE_SAVE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4413) 		update_min_vruntime(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4416) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4417)  * Preempt the current task with a newly woken task if needed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4418)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4419) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4420) check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4422) 	unsigned long ideal_runtime, delta_exec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4423) 	struct sched_entity *se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4424) 	s64 delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4425) 	bool skip_preempt = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4427) 	ideal_runtime = sched_slice(cfs_rq, curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4428) 	delta_exec = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4429) 	trace_android_rvh_check_preempt_tick(current, &ideal_runtime, &skip_preempt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4430) 			delta_exec, cfs_rq, curr, sysctl_sched_min_granularity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4431) 	if (skip_preempt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4432) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4433) 	if (delta_exec > ideal_runtime) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4434) 		resched_curr(rq_of(cfs_rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4435) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4436) 		 * The current task ran long enough, ensure it doesn't get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4437) 		 * re-elected due to buddy favours.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4438) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4439) 		clear_buddies(cfs_rq, curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4440) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4441) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4443) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4444) 	 * Ensure that a task that missed wakeup preemption by a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4445) 	 * narrow margin doesn't have to wait for a full slice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4446) 	 * This also mitigates buddy induced latencies under load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4447) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4448) 	if (delta_exec < sysctl_sched_min_granularity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4449) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4451) 	se = __pick_first_entity(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4452) 	delta = curr->vruntime - se->vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4454) 	if (delta < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4455) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4457) 	if (delta > ideal_runtime)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4458) 		resched_curr(rq_of(cfs_rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4461) void set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4463) 	/* 'current' is not kept within the tree. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4464) 	if (se->on_rq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4465) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4466) 		 * Any task has to be enqueued before it get to execute on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4467) 		 * a CPU. So account for the time it spent waiting on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4468) 		 * runqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4469) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4470) 		update_stats_wait_end(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4471) 		__dequeue_entity(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4472) 		update_load_avg(cfs_rq, se, UPDATE_TG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4473) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4475) 	update_stats_curr_start(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4476) 	cfs_rq->curr = se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4478) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4479) 	 * Track our maximum slice length, if the CPU's load is at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4480) 	 * least twice that of our own weight (i.e. dont track it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4481) 	 * when there are only lesser-weight tasks around):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4482) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4483) 	if (schedstat_enabled() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4484) 	    rq_of(cfs_rq)->cfs.load.weight >= 2*se->load.weight) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4485) 		schedstat_set(se->statistics.slice_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4486) 			max((u64)schedstat_val(se->statistics.slice_max),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4487) 			    se->sum_exec_runtime - se->prev_sum_exec_runtime));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4488) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4489) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4490) 	se->prev_sum_exec_runtime = se->sum_exec_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4492) EXPORT_SYMBOL_GPL(set_next_entity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4495) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4496) wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4498) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4499)  * Pick the next process, keeping these things in mind, in this order:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4500)  * 1) keep things fair between processes/task groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4501)  * 2) pick the "next" process, since someone really wants that to run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4502)  * 3) pick the "last" process, for cache locality
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4503)  * 4) do not run the "skip" process, if something else is available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4504)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4505) static struct sched_entity *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4506) pick_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4507) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4508) 	struct sched_entity *left = __pick_first_entity(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4509) 	struct sched_entity *se = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4511) 	trace_android_rvh_pick_next_entity(cfs_rq, curr, &se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4512) 	if (se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4513) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4515) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4516) 	 * If curr is set we have to see if its left of the leftmost entity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4517) 	 * still in the tree, provided there was anything in the tree at all.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4518) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4519) 	if (!left || (curr && entity_before(curr, left)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4520) 		left = curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4522) 	se = left; /* ideally we run the leftmost entity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4524) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4525) 	 * Avoid running the skip buddy, if running something else can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4526) 	 * be done without getting too unfair.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4527) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4528) 	if (cfs_rq->skip == se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4529) 		struct sched_entity *second;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4531) 		if (se == curr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4532) 			second = __pick_first_entity(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4533) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4534) 			second = __pick_next_entity(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4535) 			if (!second || (curr && entity_before(curr, second)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4536) 				second = curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4537) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4539) 		if (second && wakeup_preempt_entity(second, left) < 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4540) 			se = second;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4541) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4543) 	if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4544) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4545) 		 * Someone really wants this to run. If it's not unfair, run it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4546) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4547) 		se = cfs_rq->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4548) 	} else if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4549) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4550) 		 * Prefer last buddy, try to return the CPU to a preempted task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4551) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4552) 		se = cfs_rq->last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4553) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4554) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4555) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4556) 	clear_buddies(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4558) 	return se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4559) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4561) static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4563) static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4564) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4565) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4566) 	 * If still on the runqueue then deactivate_task()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4567) 	 * was not called and update_curr() has to be done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4568) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4569) 	if (prev->on_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4570) 		update_curr(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4572) 	/* throttle cfs_rqs exceeding runtime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4573) 	check_cfs_rq_runtime(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4575) 	check_spread(cfs_rq, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4577) 	if (prev->on_rq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4578) 		update_stats_wait_start(cfs_rq, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4579) 		/* Put 'current' back into the tree. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4580) 		__enqueue_entity(cfs_rq, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4581) 		/* in !on_rq case, update occurred at dequeue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4582) 		update_load_avg(cfs_rq, prev, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4583) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4584) 	cfs_rq->curr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4587) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4588) entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4589) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4590) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4591) 	 * Update run-time statistics of the 'current'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4592) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4593) 	update_curr(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4595) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4596) 	 * Ensure that runnable average is periodically updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4597) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4598) 	update_load_avg(cfs_rq, curr, UPDATE_TG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4599) 	update_cfs_group(curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4601) #ifdef CONFIG_SCHED_HRTICK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4602) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4603) 	 * queued ticks are scheduled to match the slice, so don't bother
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4604) 	 * validating it and just reschedule.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4605) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4606) 	if (queued) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4607) 		resched_curr(rq_of(cfs_rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4608) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4609) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4610) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4611) 	 * don't let the period tick interfere with the hrtick preemption
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4612) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4613) 	if (!sched_feat(DOUBLE_TICK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4614) 			hrtimer_active(&rq_of(cfs_rq)->hrtick_timer))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4615) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4616) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4618) 	if (cfs_rq->nr_running > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4619) 		check_preempt_tick(cfs_rq, curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4623) /**************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4624)  * CFS bandwidth control machinery
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4625)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4627) #ifdef CONFIG_CFS_BANDWIDTH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4629) #ifdef CONFIG_JUMP_LABEL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4630) static struct static_key __cfs_bandwidth_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4632) static inline bool cfs_bandwidth_used(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4634) 	return static_key_false(&__cfs_bandwidth_used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4637) void cfs_bandwidth_usage_inc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4639) 	static_key_slow_inc_cpuslocked(&__cfs_bandwidth_used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4642) void cfs_bandwidth_usage_dec(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4644) 	static_key_slow_dec_cpuslocked(&__cfs_bandwidth_used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4646) #else /* CONFIG_JUMP_LABEL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4647) static bool cfs_bandwidth_used(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4648) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4649) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4652) void cfs_bandwidth_usage_inc(void) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4653) void cfs_bandwidth_usage_dec(void) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4654) #endif /* CONFIG_JUMP_LABEL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4656) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4657)  * default period for cfs group bandwidth.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4658)  * default: 0.1s, units: nanoseconds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4659)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4660) static inline u64 default_cfs_period(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4662) 	return 100000000ULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4665) static inline u64 sched_cfs_bandwidth_slice(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4666) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4667) 	return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4670) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4671)  * Replenish runtime according to assigned quota. We use sched_clock_cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4672)  * directly instead of rq->clock to avoid adding additional synchronization
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4673)  * around rq->lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4674)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4675)  * requires cfs_b->lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4676)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4677) void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4678) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4679) 	if (cfs_b->quota != RUNTIME_INF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4680) 		cfs_b->runtime = cfs_b->quota;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4683) static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4684) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4685) 	return &tg->cfs_bandwidth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4688) /* returns 0 on failure to allocate runtime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4689) static int __assign_cfs_rq_runtime(struct cfs_bandwidth *cfs_b,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4690) 				   struct cfs_rq *cfs_rq, u64 target_runtime)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4691) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4692) 	u64 min_amount, amount = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4693) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4694) 	lockdep_assert_held(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4696) 	/* note: this is a positive sum as runtime_remaining <= 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4697) 	min_amount = target_runtime - cfs_rq->runtime_remaining;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4699) 	if (cfs_b->quota == RUNTIME_INF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4700) 		amount = min_amount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4701) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4702) 		start_cfs_bandwidth(cfs_b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4703) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4704) 		if (cfs_b->runtime > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4705) 			amount = min(cfs_b->runtime, min_amount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4706) 			cfs_b->runtime -= amount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4707) 			cfs_b->idle = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4708) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4709) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4711) 	cfs_rq->runtime_remaining += amount;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4712) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4713) 	return cfs_rq->runtime_remaining > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4716) /* returns 0 on failure to allocate runtime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4717) static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4718) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4719) 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4720) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4722) 	raw_spin_lock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4723) 	ret = __assign_cfs_rq_runtime(cfs_b, cfs_rq, sched_cfs_bandwidth_slice());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4724) 	raw_spin_unlock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4726) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4727) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4729) static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4730) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4731) 	/* dock delta_exec before expiring quota (as it could span periods) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4732) 	cfs_rq->runtime_remaining -= delta_exec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4734) 	if (likely(cfs_rq->runtime_remaining > 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4735) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4736) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4737) 	if (cfs_rq->throttled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4738) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4739) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4740) 	 * if we're unable to extend our runtime we resched so that the active
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4741) 	 * hierarchy can be throttled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4742) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4743) 	if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4744) 		resched_curr(rq_of(cfs_rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4747) static __always_inline
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4748) void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4749) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4750) 	if (!cfs_bandwidth_used() || !cfs_rq->runtime_enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4751) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4753) 	__account_cfs_rq_runtime(cfs_rq, delta_exec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4755) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4756) static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4758) 	return cfs_bandwidth_used() && cfs_rq->throttled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4761) /* check whether cfs_rq, or any parent, is throttled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4762) static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4763) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4764) 	return cfs_bandwidth_used() && cfs_rq->throttle_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4765) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4767) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4768)  * Ensure that neither of the group entities corresponding to src_cpu or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4769)  * dest_cpu are members of a throttled hierarchy when performing group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4770)  * load-balance operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4771)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4772) static inline int throttled_lb_pair(struct task_group *tg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4773) 				    int src_cpu, int dest_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4774) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4775) 	struct cfs_rq *src_cfs_rq, *dest_cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4777) 	src_cfs_rq = tg->cfs_rq[src_cpu];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4778) 	dest_cfs_rq = tg->cfs_rq[dest_cpu];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4780) 	return throttled_hierarchy(src_cfs_rq) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4781) 	       throttled_hierarchy(dest_cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4782) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4784) static int tg_unthrottle_up(struct task_group *tg, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4785) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4786) 	struct rq *rq = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4787) 	struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4788) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4789) 	cfs_rq->throttle_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4790) 	if (!cfs_rq->throttle_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4791) 		cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4792) 					     cfs_rq->throttled_clock_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4794) 		/* Add cfs_rq with already running entity in the list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4795) 		if (cfs_rq->nr_running >= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4796) 			list_add_leaf_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4797) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4799) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4802) static int tg_throttle_down(struct task_group *tg, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4803) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4804) 	struct rq *rq = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4805) 	struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4807) 	/* group is entering throttled state, stop time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4808) 	if (!cfs_rq->throttle_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4809) 		cfs_rq->throttled_clock_task = rq_clock_task(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4810) 		list_del_leaf_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4811) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4812) 	cfs_rq->throttle_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4814) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4817) static bool throttle_cfs_rq(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4819) 	struct rq *rq = rq_of(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4820) 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4821) 	struct sched_entity *se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4822) 	long task_delta, idle_task_delta, dequeue = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4823) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4824) 	raw_spin_lock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4825) 	/* This will start the period timer if necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4826) 	if (__assign_cfs_rq_runtime(cfs_b, cfs_rq, 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4827) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4828) 		 * We have raced with bandwidth becoming available, and if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4829) 		 * actually throttled the timer might not unthrottle us for an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4830) 		 * entire period. We additionally needed to make sure that any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4831) 		 * subsequent check_cfs_rq_runtime calls agree not to throttle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4832) 		 * us, as we may commit to do cfs put_prev+pick_next, so we ask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4833) 		 * for 1ns of runtime rather than just check cfs_b.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4834) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4835) 		dequeue = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4836) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4837) 		list_add_tail_rcu(&cfs_rq->throttled_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4838) 				  &cfs_b->throttled_cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4839) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4840) 	raw_spin_unlock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4842) 	if (!dequeue)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4843) 		return false;  /* Throttle no longer required. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4845) 	se = cfs_rq->tg->se[cpu_of(rq_of(cfs_rq))];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4846) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4847) 	/* freeze hierarchy runnable averages while throttled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4848) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4849) 	walk_tg_tree_from(cfs_rq->tg, tg_throttle_down, tg_nop, (void *)rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4850) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4852) 	task_delta = cfs_rq->h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4853) 	idle_task_delta = cfs_rq->idle_h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4854) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4855) 		struct cfs_rq *qcfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4856) 		/* throttled entity or throttle-on-deactivate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4857) 		if (!se->on_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4858) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4860) 		if (dequeue) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4861) 			dequeue_entity(qcfs_rq, se, DEQUEUE_SLEEP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4862) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4863) 			update_load_avg(qcfs_rq, se, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4864) 			se_update_runnable(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4865) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4867) 		qcfs_rq->h_nr_running -= task_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4868) 		qcfs_rq->idle_h_nr_running -= idle_task_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4869) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4870) 		if (qcfs_rq->load.weight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4871) 			dequeue = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4872) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4874) 	if (!se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4875) 		sub_nr_running(rq, task_delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4877) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4878) 	 * Note: distribution will already see us throttled via the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4879) 	 * throttled-list.  rq->lock protects completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4880) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4881) 	cfs_rq->throttled = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4882) 	cfs_rq->throttled_clock = rq_clock(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4883) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4886) void unthrottle_cfs_rq(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4887) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4888) 	struct rq *rq = rq_of(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4889) 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4890) 	struct sched_entity *se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4891) 	long task_delta, idle_task_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4893) 	se = cfs_rq->tg->se[cpu_of(rq)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4895) 	cfs_rq->throttled = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4897) 	update_rq_clock(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4899) 	raw_spin_lock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4900) 	cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4901) 	list_del_rcu(&cfs_rq->throttled_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4902) 	raw_spin_unlock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4904) 	/* update hierarchical throttle state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4905) 	walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4907) 	if (!cfs_rq->load.weight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4908) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4910) 	task_delta = cfs_rq->h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4911) 	idle_task_delta = cfs_rq->idle_h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4912) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4913) 		if (se->on_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4914) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4915) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4916) 		enqueue_entity(cfs_rq, se, ENQUEUE_WAKEUP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4918) 		cfs_rq->h_nr_running += task_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4919) 		cfs_rq->idle_h_nr_running += idle_task_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4921) 		/* end evaluation on encountering a throttled cfs_rq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4922) 		if (cfs_rq_throttled(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4923) 			goto unthrottle_throttle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4924) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4926) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4927) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4929) 		update_load_avg(cfs_rq, se, UPDATE_TG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4930) 		se_update_runnable(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4932) 		cfs_rq->h_nr_running += task_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4933) 		cfs_rq->idle_h_nr_running += idle_task_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4936) 		/* end evaluation on encountering a throttled cfs_rq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4937) 		if (cfs_rq_throttled(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4938) 			goto unthrottle_throttle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4940) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4941) 		 * One parent has been throttled and cfs_rq removed from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4942) 		 * list. Add it back to not break the leaf list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4943) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4944) 		if (throttled_hierarchy(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4945) 			list_add_leaf_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4946) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4948) 	/* At this point se is NULL and we are at root level*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4949) 	add_nr_running(rq, task_delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4951) unthrottle_throttle:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4952) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4953) 	 * The cfs_rq_throttled() breaks in the above iteration can result in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4954) 	 * incomplete leaf list maintenance, resulting in triggering the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4955) 	 * assertion below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4956) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4957) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4958) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4960) 		if (list_add_leaf_cfs_rq(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4961) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4962) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4964) 	assert_list_leaf_cfs_rq(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4966) 	/* Determine whether we need to wake up potentially idle CPU: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4967) 	if (rq->curr == rq->idle && rq->cfs.nr_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4968) 		resched_curr(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4971) static void distribute_cfs_runtime(struct cfs_bandwidth *cfs_b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4972) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4973) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4974) 	u64 runtime, remaining = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4976) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4977) 	list_for_each_entry_rcu(cfs_rq, &cfs_b->throttled_cfs_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4978) 				throttled_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4979) 		struct rq *rq = rq_of(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4980) 		struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4982) 		rq_lock_irqsave(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4983) 		if (!cfs_rq_throttled(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4984) 			goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4986) 		/* By the above check, this should never be true */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4987) 		SCHED_WARN_ON(cfs_rq->runtime_remaining > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4989) 		raw_spin_lock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4990) 		runtime = -cfs_rq->runtime_remaining + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4991) 		if (runtime > cfs_b->runtime)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4992) 			runtime = cfs_b->runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4993) 		cfs_b->runtime -= runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4994) 		remaining = cfs_b->runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4995) 		raw_spin_unlock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4997) 		cfs_rq->runtime_remaining += runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4998) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  4999) 		/* we check whether we're throttled above */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5000) 		if (cfs_rq->runtime_remaining > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5001) 			unthrottle_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5003) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5004) 		rq_unlock_irqrestore(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5005) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5006) 		if (!remaining)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5007) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5008) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5009) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5012) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5013)  * Responsible for refilling a task_group's bandwidth and unthrottling its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5014)  * cfs_rqs as appropriate. If there has been no activity within the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5015)  * period the timer is deactivated until scheduling resumes; cfs_b->idle is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5016)  * used to track this state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5017)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5018) static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun, unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5019) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5020) 	int throttled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5021) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5022) 	/* no need to continue the timer with no bandwidth constraint */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5023) 	if (cfs_b->quota == RUNTIME_INF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5024) 		goto out_deactivate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5026) 	throttled = !list_empty(&cfs_b->throttled_cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5027) 	cfs_b->nr_periods += overrun;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5028) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5029) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5030) 	 * idle depends on !throttled (for the case of a large deficit), and if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5031) 	 * we're going inactive then everything else can be deferred
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5032) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5033) 	if (cfs_b->idle && !throttled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5034) 		goto out_deactivate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5036) 	__refill_cfs_bandwidth_runtime(cfs_b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5038) 	if (!throttled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5039) 		/* mark as potentially idle for the upcoming period */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5040) 		cfs_b->idle = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5041) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5042) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5043) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5044) 	/* account preceding periods in which throttling occurred */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5045) 	cfs_b->nr_throttled += overrun;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5047) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5048) 	 * This check is repeated as we release cfs_b->lock while we unthrottle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5049) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5050) 	while (throttled && cfs_b->runtime > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5051) 		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5052) 		/* we can't nest cfs_b->lock while distributing bandwidth */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5053) 		distribute_cfs_runtime(cfs_b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5054) 		raw_spin_lock_irqsave(&cfs_b->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5056) 		throttled = !list_empty(&cfs_b->throttled_cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5057) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5059) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5060) 	 * While we are ensured activity in the period following an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5061) 	 * unthrottle, this also covers the case in which the new bandwidth is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5062) 	 * insufficient to cover the existing bandwidth deficit.  (Forcing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5063) 	 * timer to remain active while there are any throttled entities.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5064) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5065) 	cfs_b->idle = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5067) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5068) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5069) out_deactivate:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5070) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5073) /* a cfs_rq won't donate quota below this amount */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5074) static const u64 min_cfs_rq_runtime = 1 * NSEC_PER_MSEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5075) /* minimum remaining period time to redistribute slack quota */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5076) static const u64 min_bandwidth_expiration = 2 * NSEC_PER_MSEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5077) /* how long we wait to gather additional slack before distributing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5078) static const u64 cfs_bandwidth_slack_period = 5 * NSEC_PER_MSEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5080) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5081)  * Are we near the end of the current quota period?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5082)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5083)  * Requires cfs_b->lock for hrtimer_expires_remaining to be safe against the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5084)  * hrtimer base being cleared by hrtimer_start. In the case of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5085)  * migrate_hrtimers, base is never cleared, so we are fine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5086)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5087) static int runtime_refresh_within(struct cfs_bandwidth *cfs_b, u64 min_expire)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5088) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5089) 	struct hrtimer *refresh_timer = &cfs_b->period_timer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5090) 	s64 remaining;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5092) 	/* if the call-back is running a quota refresh is already occurring */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5093) 	if (hrtimer_callback_running(refresh_timer))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5094) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5096) 	/* is a quota refresh about to occur? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5097) 	remaining = ktime_to_ns(hrtimer_expires_remaining(refresh_timer));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5098) 	if (remaining < (s64)min_expire)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5099) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5101) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5104) static void start_cfs_slack_bandwidth(struct cfs_bandwidth *cfs_b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5106) 	u64 min_left = cfs_bandwidth_slack_period + min_bandwidth_expiration;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5108) 	/* if there's a quota refresh soon don't bother with slack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5109) 	if (runtime_refresh_within(cfs_b, min_left))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5110) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5112) 	/* don't push forwards an existing deferred unthrottle */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5113) 	if (cfs_b->slack_started)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5114) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5115) 	cfs_b->slack_started = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5117) 	hrtimer_start(&cfs_b->slack_timer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5118) 			ns_to_ktime(cfs_bandwidth_slack_period),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5119) 			HRTIMER_MODE_REL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5122) /* we know any runtime found here is valid as update_curr() precedes return */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5123) static void __return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5124) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5125) 	struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5126) 	s64 slack_runtime = cfs_rq->runtime_remaining - min_cfs_rq_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5128) 	if (slack_runtime <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5129) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5131) 	raw_spin_lock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5132) 	if (cfs_b->quota != RUNTIME_INF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5133) 		cfs_b->runtime += slack_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5135) 		/* we are under rq->lock, defer unthrottling using a timer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5136) 		if (cfs_b->runtime > sched_cfs_bandwidth_slice() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5137) 		    !list_empty(&cfs_b->throttled_cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5138) 			start_cfs_slack_bandwidth(cfs_b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5139) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5140) 	raw_spin_unlock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5142) 	/* even if it's not valid for return we don't want to try again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5143) 	cfs_rq->runtime_remaining -= slack_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5146) static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5148) 	if (!cfs_bandwidth_used())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5149) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5151) 	if (!cfs_rq->runtime_enabled || cfs_rq->nr_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5152) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5154) 	__return_cfs_rq_runtime(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5158)  * This is done with a timer (instead of inline with bandwidth return) since
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5159)  * it's necessary to juggle rq->locks to unthrottle their respective cfs_rqs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5160)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5161) static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5162) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5163) 	u64 runtime = 0, slice = sched_cfs_bandwidth_slice();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5164) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5166) 	/* confirm we're still not at a refresh boundary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5167) 	raw_spin_lock_irqsave(&cfs_b->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5168) 	cfs_b->slack_started = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5170) 	if (runtime_refresh_within(cfs_b, min_bandwidth_expiration)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5171) 		raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5172) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5173) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5175) 	if (cfs_b->quota != RUNTIME_INF && cfs_b->runtime > slice)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5176) 		runtime = cfs_b->runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5178) 	raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5180) 	if (!runtime)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5181) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5183) 	distribute_cfs_runtime(cfs_b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5185) 	raw_spin_lock_irqsave(&cfs_b->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5186) 	raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5189) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5190)  * When a group wakes up we want to make sure that its quota is not already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5191)  * expired/exceeded, otherwise it may be allowed to steal additional ticks of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5192)  * runtime as update_curr() throttling can not trigger until it's on-rq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5193)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5194) static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5195) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5196) 	if (!cfs_bandwidth_used())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5197) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5199) 	/* an active group must be handled by the update_curr()->put() path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5200) 	if (!cfs_rq->runtime_enabled || cfs_rq->curr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5201) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5203) 	/* ensure the group is not already throttled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5204) 	if (cfs_rq_throttled(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5205) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5207) 	/* update runtime allocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5208) 	account_cfs_rq_runtime(cfs_rq, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5209) 	if (cfs_rq->runtime_remaining <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5210) 		throttle_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5213) static void sync_throttle(struct task_group *tg, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5215) 	struct cfs_rq *pcfs_rq, *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5217) 	if (!cfs_bandwidth_used())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5218) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5220) 	if (!tg->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5221) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5223) 	cfs_rq = tg->cfs_rq[cpu];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5224) 	pcfs_rq = tg->parent->cfs_rq[cpu];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5226) 	cfs_rq->throttle_count = pcfs_rq->throttle_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5227) 	cfs_rq->throttled_clock_task = rq_clock_task(cpu_rq(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5230) /* conditionally throttle active cfs_rq's from put_prev_entity() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5231) static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5233) 	if (!cfs_bandwidth_used())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5234) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5236) 	if (likely(!cfs_rq->runtime_enabled || cfs_rq->runtime_remaining > 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5237) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5239) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5240) 	 * it's possible for a throttled entity to be forced into a running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5241) 	 * state (e.g. set_curr_task), in this case we're finished.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5242) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5243) 	if (cfs_rq_throttled(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5244) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5246) 	return throttle_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5249) static enum hrtimer_restart sched_cfs_slack_timer(struct hrtimer *timer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5251) 	struct cfs_bandwidth *cfs_b =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5252) 		container_of(timer, struct cfs_bandwidth, slack_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5254) 	do_sched_cfs_slack_timer(cfs_b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5256) 	return HRTIMER_NORESTART;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5257) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5259) extern const u64 max_cfs_quota_period;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5261) static enum hrtimer_restart sched_cfs_period_timer(struct hrtimer *timer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5263) 	struct cfs_bandwidth *cfs_b =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5264) 		container_of(timer, struct cfs_bandwidth, period_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5265) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5266) 	int overrun;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5267) 	int idle = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5268) 	int count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5270) 	raw_spin_lock_irqsave(&cfs_b->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5271) 	for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5272) 		overrun = hrtimer_forward_now(timer, cfs_b->period);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5273) 		if (!overrun)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5274) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5276) 		idle = do_sched_cfs_period_timer(cfs_b, overrun, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5278) 		if (++count > 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5279) 			u64 new, old = ktime_to_ns(cfs_b->period);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5281) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5282) 			 * Grow period by a factor of 2 to avoid losing precision.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5283) 			 * Precision loss in the quota/period ratio can cause __cfs_schedulable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5284) 			 * to fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5285) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5286) 			new = old * 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5287) 			if (new < max_cfs_quota_period) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5288) 				cfs_b->period = ns_to_ktime(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5289) 				cfs_b->quota *= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5291) 				pr_warn_ratelimited(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5292) 	"cfs_period_timer[cpu%d]: period too short, scaling up (new cfs_period_us = %lld, cfs_quota_us = %lld)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5293) 					smp_processor_id(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5294) 					div_u64(new, NSEC_PER_USEC),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5295) 					div_u64(cfs_b->quota, NSEC_PER_USEC));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5296) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5297) 				pr_warn_ratelimited(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5298) 	"cfs_period_timer[cpu%d]: period too short, but cannot scale up without losing precision (cfs_period_us = %lld, cfs_quota_us = %lld)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5299) 					smp_processor_id(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5300) 					div_u64(old, NSEC_PER_USEC),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5301) 					div_u64(cfs_b->quota, NSEC_PER_USEC));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5302) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5304) 			/* reset count so we don't come right back in here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5305) 			count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5306) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5307) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5308) 	if (idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5309) 		cfs_b->period_active = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5310) 	raw_spin_unlock_irqrestore(&cfs_b->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5312) 	return idle ? HRTIMER_NORESTART : HRTIMER_RESTART;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5315) void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5317) 	raw_spin_lock_init(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5318) 	cfs_b->runtime = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5319) 	cfs_b->quota = RUNTIME_INF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5320) 	cfs_b->period = ns_to_ktime(default_cfs_period());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5322) 	INIT_LIST_HEAD(&cfs_b->throttled_cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5323) 	hrtimer_init(&cfs_b->period_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5324) 	cfs_b->period_timer.function = sched_cfs_period_timer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5325) 	hrtimer_init(&cfs_b->slack_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5326) 	cfs_b->slack_timer.function = sched_cfs_slack_timer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5327) 	cfs_b->slack_started = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5330) static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5332) 	cfs_rq->runtime_enabled = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5333) 	INIT_LIST_HEAD(&cfs_rq->throttled_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5336) void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5337) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5338) 	lockdep_assert_held(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5340) 	if (cfs_b->period_active)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5341) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5343) 	cfs_b->period_active = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5344) 	hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5345) 	hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5348) static void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5350) 	/* init_cfs_bandwidth() was not called */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5351) 	if (!cfs_b->throttled_cfs_rq.next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5352) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5354) 	hrtimer_cancel(&cfs_b->period_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5355) 	hrtimer_cancel(&cfs_b->slack_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5358) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5359)  * Both these CPU hotplug callbacks race against unregister_fair_sched_group()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5360)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5361)  * The race is harmless, since modifying bandwidth settings of unhooked group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5362)  * bits doesn't do much.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5363)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5365) /* cpu online calback */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5366) static void __maybe_unused update_runtime_enabled(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5367) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5368) 	struct task_group *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5370) 	lockdep_assert_held(&rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5372) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5373) 	list_for_each_entry_rcu(tg, &task_groups, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5374) 		struct cfs_bandwidth *cfs_b = &tg->cfs_bandwidth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5375) 		struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5377) 		raw_spin_lock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5378) 		cfs_rq->runtime_enabled = cfs_b->quota != RUNTIME_INF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5379) 		raw_spin_unlock(&cfs_b->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5380) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5381) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5384) /* cpu offline callback */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5385) static void __maybe_unused unthrottle_offline_cfs_rqs(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5386) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5387) 	struct task_group *tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5389) 	lockdep_assert_held(&rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5391) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5392) 	list_for_each_entry_rcu(tg, &task_groups, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5393) 		struct cfs_rq *cfs_rq = tg->cfs_rq[cpu_of(rq)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5395) 		if (!cfs_rq->runtime_enabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5396) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5398) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5399) 		 * clock_task is not advancing so we just need to make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5400) 		 * there's some valid quota amount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5401) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5402) 		cfs_rq->runtime_remaining = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5403) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5404) 		 * Offline rq is schedulable till CPU is completely disabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5405) 		 * in take_cpu_down(), so we prevent new cfs throttling here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5406) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5407) 		cfs_rq->runtime_enabled = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5409) 		if (cfs_rq_throttled(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5410) 			unthrottle_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5411) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5412) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5415) #else /* CONFIG_CFS_BANDWIDTH */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5417) static inline bool cfs_bandwidth_used(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5418) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5419) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5422) static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5423) static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5424) static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5425) static inline void sync_throttle(struct task_group *tg, int cpu) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5426) static __always_inline void return_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5428) static inline int cfs_rq_throttled(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5430) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5433) static inline int throttled_hierarchy(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5434) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5435) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5438) static inline int throttled_lb_pair(struct task_group *tg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5439) 				    int src_cpu, int dest_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5441) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5444) void init_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5446) #ifdef CONFIG_FAIR_GROUP_SCHED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5447) static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5448) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5450) static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5451) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5452) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5454) static inline void destroy_cfs_bandwidth(struct cfs_bandwidth *cfs_b) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5455) static inline void update_runtime_enabled(struct rq *rq) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5456) static inline void unthrottle_offline_cfs_rqs(struct rq *rq) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5458) #endif /* CONFIG_CFS_BANDWIDTH */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5460) /**************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5461)  * CFS operations on tasks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5462)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5464) #ifdef CONFIG_SCHED_HRTICK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5465) static void hrtick_start_fair(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5466) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5467) 	struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5468) 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5470) 	SCHED_WARN_ON(task_rq(p) != rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5472) 	if (rq->cfs.h_nr_running > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5473) 		u64 slice = sched_slice(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5474) 		u64 ran = se->sum_exec_runtime - se->prev_sum_exec_runtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5475) 		s64 delta = slice - ran;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5477) 		if (delta < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5478) 			if (rq->curr == p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5479) 				resched_curr(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5480) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5481) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5482) 		hrtick_start(rq, delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5483) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5484) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5486) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5487)  * called from enqueue/dequeue and updates the hrtick when the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5488)  * current task is from our class and nr_running is low enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5489)  * to matter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5490)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5491) static void hrtick_update(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5492) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5493) 	struct task_struct *curr = rq->curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5495) 	if (!hrtick_enabled(rq) || curr->sched_class != &fair_sched_class)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5496) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5498) 	if (cfs_rq_of(&curr->se)->nr_running < sched_nr_latency)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5499) 		hrtick_start_fair(rq, curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5501) #else /* !CONFIG_SCHED_HRTICK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5502) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5503) hrtick_start_fair(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5507) static inline void hrtick_update(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5508) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5510) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5511) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5512) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5513) static inline unsigned long cpu_util(int cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5515) static inline bool cpu_overutilized(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5516) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5517) 	int overutilized = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5519) 	trace_android_rvh_cpu_overutilized(cpu, &overutilized);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5520) 	if (overutilized != -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5521) 		return overutilized;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5523) 	return !fits_capacity(cpu_util(cpu), capacity_of(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5526) static inline void update_overutilized_status(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5527) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5528) 	if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5529) 		WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5530) 		trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5531) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5533) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5534) static inline void update_overutilized_status(struct rq *rq) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5535) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5537) /* Runqueue only has SCHED_IDLE tasks enqueued */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5538) static int sched_idle_rq(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5540) 	return unlikely(rq->nr_running == rq->cfs.idle_h_nr_running &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5541) 			rq->nr_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5542) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5544) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5545) static int sched_idle_cpu(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5546) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5547) 	return sched_idle_rq(cpu_rq(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5549) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5551) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5552)  * The enqueue_task method is called before nr_running is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5553)  * increased. Here we update the fair scheduling stats and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5554)  * then put the task into the rbtree:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5555)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5556) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5557) enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5558) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5559) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5560) 	struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5561) 	int idle_h_nr_running = task_has_idle_policy(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5562) 	int task_new = !(flags & ENQUEUE_WAKEUP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5563) 	int should_iowait_boost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5565) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5566) 	 * The code below (indirectly) updates schedutil which looks at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5567) 	 * the cfs_rq utilization to select a frequency.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5568) 	 * Let's add the task's estimated utilization to the cfs_rq's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5569) 	 * estimated utilization, before we update schedutil.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5570) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5571) 	util_est_enqueue(&rq->cfs, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5573) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5574) 	 * If in_iowait is set, the code below may not trigger any cpufreq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5575) 	 * utilization updates, so do it here explicitly with the IOWAIT flag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5576) 	 * passed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5577) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5578) 	should_iowait_boost = p->in_iowait;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5579) 	trace_android_rvh_set_iowait(p, &should_iowait_boost);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5580) 	if (should_iowait_boost)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5581) 		cpufreq_update_util(rq, SCHED_CPUFREQ_IOWAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5583) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5584) 		if (se->on_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5585) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5586) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5587) 		enqueue_entity(cfs_rq, se, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5589) 		cfs_rq->h_nr_running++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5590) 		cfs_rq->idle_h_nr_running += idle_h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5592) 		/* end evaluation on encountering a throttled cfs_rq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5593) 		if (cfs_rq_throttled(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5594) 			goto enqueue_throttle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5596) 		flags = ENQUEUE_WAKEUP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5597) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5599) 	trace_android_rvh_enqueue_task_fair(rq, p, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5600) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5601) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5603) 		update_load_avg(cfs_rq, se, UPDATE_TG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5604) 		se_update_runnable(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5605) 		update_cfs_group(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5607) 		cfs_rq->h_nr_running++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5608) 		cfs_rq->idle_h_nr_running += idle_h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5610) 		/* end evaluation on encountering a throttled cfs_rq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5611) 		if (cfs_rq_throttled(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5612) 			goto enqueue_throttle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5614)                /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5615)                 * One parent has been throttled and cfs_rq removed from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5616)                 * list. Add it back to not break the leaf list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5617)                 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5618)                if (throttled_hierarchy(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5619)                        list_add_leaf_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5620) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5622) 	/* At this point se is NULL and we are at root level*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5623) 	add_nr_running(rq, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5625) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5626) 	 * Since new tasks are assigned an initial util_avg equal to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5627) 	 * half of the spare capacity of their CPU, tiny tasks have the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5628) 	 * ability to cross the overutilized threshold, which will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5629) 	 * result in the load balancer ruining all the task placement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5630) 	 * done by EAS. As a way to mitigate that effect, do not account
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5631) 	 * for the first enqueue operation of new tasks during the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5632) 	 * overutilized flag detection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5633) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5634) 	 * A better way of solving this problem would be to wait for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5635) 	 * the PELT signals of tasks to converge before taking them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5636) 	 * into account, but that is not straightforward to implement,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5637) 	 * and the following generally works well enough in practice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5638) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5639) 	if (!task_new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5640) 		update_overutilized_status(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5642) enqueue_throttle:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5643) 	if (cfs_bandwidth_used()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5644) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5645) 		 * When bandwidth control is enabled; the cfs_rq_throttled()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5646) 		 * breaks in the above iteration can result in incomplete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5647) 		 * leaf list maintenance, resulting in triggering the assertion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5648) 		 * below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5649) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5650) 		for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5651) 			cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5653) 			if (list_add_leaf_cfs_rq(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5654) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5655) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5656) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5658) 	assert_list_leaf_cfs_rq(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5660) 	hrtick_update(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5663) static void set_next_buddy(struct sched_entity *se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5665) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5666)  * The dequeue_task method is called before nr_running is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5667)  * decreased. We remove the task from the rbtree and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5668)  * update the fair scheduling stats:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5669)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5670) static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5671) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5672) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5673) 	struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5674) 	int task_sleep = flags & DEQUEUE_SLEEP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5675) 	int idle_h_nr_running = task_has_idle_policy(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5676) 	bool was_sched_idle = sched_idle_rq(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5678) 	util_est_dequeue(&rq->cfs, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5680) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5681) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5682) 		dequeue_entity(cfs_rq, se, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5684) 		cfs_rq->h_nr_running--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5685) 		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5687) 		/* end evaluation on encountering a throttled cfs_rq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5688) 		if (cfs_rq_throttled(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5689) 			goto dequeue_throttle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5691) 		/* Don't dequeue parent if it has other entities besides us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5692) 		if (cfs_rq->load.weight) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5693) 			/* Avoid re-evaluating load for this entity: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5694) 			se = parent_entity(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5695) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5696) 			 * Bias pick_next to pick a task from this cfs_rq, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5697) 			 * p is sleeping when it is within its sched_slice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5698) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5699) 			if (task_sleep && se && !throttled_hierarchy(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5700) 				set_next_buddy(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5701) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5702) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5703) 		flags |= DEQUEUE_SLEEP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5704) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5706) 	trace_android_rvh_dequeue_task_fair(rq, p, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5707) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5708) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5709) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5710) 		update_load_avg(cfs_rq, se, UPDATE_TG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5711) 		se_update_runnable(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5712) 		update_cfs_group(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5714) 		cfs_rq->h_nr_running--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5715) 		cfs_rq->idle_h_nr_running -= idle_h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5717) 		/* end evaluation on encountering a throttled cfs_rq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5718) 		if (cfs_rq_throttled(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5719) 			goto dequeue_throttle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5721) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5723) 	/* At this point se is NULL and we are at root level*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5724) 	sub_nr_running(rq, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5726) 	/* balance early to pull high priority tasks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5727) 	if (unlikely(!was_sched_idle && sched_idle_rq(rq)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5728) 		rq->next_balance = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5730) dequeue_throttle:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5731) 	util_est_update(&rq->cfs, p, task_sleep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5732) 	hrtick_update(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5735) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5736) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5737) /* Working cpumask for: load_balance, load_balance_newidle. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5738) DEFINE_PER_CPU(cpumask_var_t, load_balance_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5739) DEFINE_PER_CPU(cpumask_var_t, select_idle_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5741) #ifdef CONFIG_NO_HZ_COMMON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5742) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5743) static struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5744) 	cpumask_var_t idle_cpus_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5745) 	atomic_t nr_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5746) 	int has_blocked;		/* Idle CPUS has blocked load */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5747) 	unsigned long next_balance;     /* in jiffy units */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5748) 	unsigned long next_blocked;	/* Next update of blocked load in jiffies */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5749) } nohz ____cacheline_aligned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5751) #endif /* CONFIG_NO_HZ_COMMON */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5753) static unsigned long cpu_load(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5754) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5755) 	return cfs_rq_load_avg(&rq->cfs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5756) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5757) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5758) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5759)  * cpu_load_without - compute CPU load without any contributions from *p
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5760)  * @cpu: the CPU which load is requested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5761)  * @p: the task which load should be discounted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5762)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5763)  * The load of a CPU is defined by the load of tasks currently enqueued on that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5764)  * CPU as well as tasks which are currently sleeping after an execution on that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5765)  * CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5766)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5767)  * This method returns the load of the specified CPU by discounting the load of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5768)  * the specified task, whenever the task is currently contributing to the CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5769)  * load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5770)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5771) static unsigned long cpu_load_without(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5773) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5774) 	unsigned int load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5776) 	/* Task has no contribution or is new */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5777) 	if (cpu_of(rq) != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5778) 		return cpu_load(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5780) 	cfs_rq = &rq->cfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5781) 	load = READ_ONCE(cfs_rq->avg.load_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5783) 	/* Discount task's util from CPU's util */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5784) 	lsub_positive(&load, task_h_load(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5786) 	return load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5788) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5789) static unsigned long cpu_runnable(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5791) 	return cfs_rq_runnable_avg(&rq->cfs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5794) static unsigned long cpu_runnable_without(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5795) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5796) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5797) 	unsigned int runnable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5799) 	/* Task has no contribution or is new */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5800) 	if (cpu_of(rq) != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5801) 		return cpu_runnable(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5803) 	cfs_rq = &rq->cfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5804) 	runnable = READ_ONCE(cfs_rq->avg.runnable_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5805) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5806) 	/* Discount task's runnable from CPU's runnable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5807) 	lsub_positive(&runnable, p->se.avg.runnable_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5809) 	return runnable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5812) static unsigned long capacity_of(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5813) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5814) 	return cpu_rq(cpu)->cpu_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5817) static void record_wakee(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5819) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5820) 	 * Only decay a single time; tasks that have less then 1 wakeup per
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5821) 	 * jiffy will not have built up many flips.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5822) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5823) 	if (time_after(jiffies, current->wakee_flip_decay_ts + HZ)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5824) 		current->wakee_flips >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5825) 		current->wakee_flip_decay_ts = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5826) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5828) 	if (current->last_wakee != p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5829) 		current->last_wakee = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5830) 		current->wakee_flips++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5831) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5834) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5835)  * Detect M:N waker/wakee relationships via a switching-frequency heuristic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5836)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5837)  * A waker of many should wake a different task than the one last awakened
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5838)  * at a frequency roughly N times higher than one of its wakees.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5839)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5840)  * In order to determine whether we should let the load spread vs consolidating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5841)  * to shared cache, we look for a minimum 'flip' frequency of llc_size in one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5842)  * partner, and a factor of lls_size higher frequency in the other.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5843)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5844)  * With both conditions met, we can be relatively sure that the relationship is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5845)  * non-monogamous, with partner count exceeding socket size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5846)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5847)  * Waker/wakee being client/server, worker/dispatcher, interrupt source or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5848)  * whatever is irrelevant, spread criteria is apparent partner count exceeds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5849)  * socket size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5850)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5851) static int wake_wide(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5853) 	unsigned int master = current->wakee_flips;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5854) 	unsigned int slave = p->wakee_flips;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5855) 	int factor = __this_cpu_read(sd_llc_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5857) 	if (master < slave)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5858) 		swap(master, slave);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5859) 	if (slave < factor || master < slave * factor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5860) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5861) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5862) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5864) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5865)  * The purpose of wake_affine() is to quickly determine on which CPU we can run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5866)  * soonest. For the purpose of speed we only consider the waking and previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5867)  * CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5868)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5869)  * wake_affine_idle() - only considers 'now', it check if the waking CPU is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5870)  *			cache-affine and is (or	will be) idle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5871)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5872)  * wake_affine_weight() - considers the weight to reflect the average
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5873)  *			  scheduling latency of the CPUs. This seems to work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5874)  *			  for the overloaded case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5875)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5876) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5877) wake_affine_idle(int this_cpu, int prev_cpu, int sync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5878) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5879) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5880) 	 * If this_cpu is idle, it implies the wakeup is from interrupt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5881) 	 * context. Only allow the move if cache is shared. Otherwise an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5882) 	 * interrupt intensive workload could force all tasks onto one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5883) 	 * node depending on the IO topology or IRQ affinity settings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5884) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5885) 	 * If the prev_cpu is idle and cache affine then avoid a migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5886) 	 * There is no guarantee that the cache hot data from an interrupt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5887) 	 * is more important than cache hot data on the prev_cpu and from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5888) 	 * a cpufreq perspective, it's better to have higher utilisation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5889) 	 * on one CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5890) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5891) 	if (available_idle_cpu(this_cpu) && cpus_share_cache(this_cpu, prev_cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5892) 		return available_idle_cpu(prev_cpu) ? prev_cpu : this_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5894) 	if (sync && cpu_rq(this_cpu)->nr_running == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5895) 		return this_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5897) 	return nr_cpumask_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5900) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5901) wake_affine_weight(struct sched_domain *sd, struct task_struct *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5902) 		   int this_cpu, int prev_cpu, int sync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5903) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5904) 	s64 this_eff_load, prev_eff_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5905) 	unsigned long task_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5907) 	this_eff_load = cpu_load(cpu_rq(this_cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5909) 	if (sync) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5910) 		unsigned long current_load = task_h_load(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5912) 		if (current_load > this_eff_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5913) 			return this_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5915) 		this_eff_load -= current_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5916) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5918) 	task_load = task_h_load(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5919) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5920) 	this_eff_load += task_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5921) 	if (sched_feat(WA_BIAS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5922) 		this_eff_load *= 100;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5923) 	this_eff_load *= capacity_of(prev_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5924) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5925) 	prev_eff_load = cpu_load(cpu_rq(prev_cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5926) 	prev_eff_load -= task_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5927) 	if (sched_feat(WA_BIAS))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5928) 		prev_eff_load *= 100 + (sd->imbalance_pct - 100) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5929) 	prev_eff_load *= capacity_of(this_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5931) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5932) 	 * If sync, adjust the weight of prev_eff_load such that if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5933) 	 * prev_eff == this_eff that select_idle_sibling() will consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5934) 	 * stacking the wakee on top of the waker if no other CPU is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5935) 	 * idle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5936) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5937) 	if (sync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5938) 		prev_eff_load += 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5940) 	return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5943) static int wake_affine(struct sched_domain *sd, struct task_struct *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5944) 		       int this_cpu, int prev_cpu, int sync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5945) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5946) 	int target = nr_cpumask_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5948) 	if (sched_feat(WA_IDLE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5949) 		target = wake_affine_idle(this_cpu, prev_cpu, sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5951) 	if (sched_feat(WA_WEIGHT) && target == nr_cpumask_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5952) 		target = wake_affine_weight(sd, p, this_cpu, prev_cpu, sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5954) 	schedstat_inc(p->se.statistics.nr_wakeups_affine_attempts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5955) 	if (target == nr_cpumask_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5956) 		return prev_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5958) 	schedstat_inc(sd->ttwu_move_affine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5959) 	schedstat_inc(p->se.statistics.nr_wakeups_affine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5960) 	return target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5961) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5963) static struct sched_group *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5964) find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5966) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5967)  * find_idlest_group_cpu - find the idlest CPU among the CPUs in the group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5968)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5969) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5970) find_idlest_group_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5971) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5972) 	unsigned long load, min_load = ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5973) 	unsigned int min_exit_latency = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5974) 	u64 latest_idle_timestamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5975) 	int least_loaded_cpu = this_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5976) 	int shallowest_idle_cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5977) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5979) 	/* Check if we have any choice: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5980) 	if (group->group_weight == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5981) 		return cpumask_first(sched_group_span(group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5983) 	/* Traverse only the allowed CPUs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5984) 	for_each_cpu_and(i, sched_group_span(group), p->cpus_ptr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5985) 		if (sched_idle_cpu(i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5986) 			return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5988) 		if (available_idle_cpu(i)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5989) 			struct rq *rq = cpu_rq(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5990) 			struct cpuidle_state *idle = idle_get_state(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5991) 			if (idle && idle->exit_latency < min_exit_latency) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5992) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5993) 				 * We give priority to a CPU whose idle state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5994) 				 * has the smallest exit latency irrespective
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5995) 				 * of any idle timestamp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5996) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5997) 				min_exit_latency = idle->exit_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5998) 				latest_idle_timestamp = rq->idle_stamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  5999) 				shallowest_idle_cpu = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6000) 			} else if ((!idle || idle->exit_latency == min_exit_latency) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6001) 				   rq->idle_stamp > latest_idle_timestamp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6002) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6003) 				 * If equal or no active idle state, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6004) 				 * the most recently idled CPU might have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6005) 				 * a warmer cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6006) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6007) 				latest_idle_timestamp = rq->idle_stamp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6008) 				shallowest_idle_cpu = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6009) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6010) 		} else if (shallowest_idle_cpu == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6011) 			load = cpu_load(cpu_rq(i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6012) 			if (load < min_load) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6013) 				min_load = load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6014) 				least_loaded_cpu = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6015) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6016) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6017) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6018) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6019) 	return shallowest_idle_cpu != -1 ? shallowest_idle_cpu : least_loaded_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6021) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6022) static inline int find_idlest_cpu(struct sched_domain *sd, struct task_struct *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6023) 				  int cpu, int prev_cpu, int sd_flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6024) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6025) 	int new_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6026) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6027) 	if (!cpumask_intersects(sched_domain_span(sd), p->cpus_ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6028) 		return prev_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6030) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6031) 	 * We need task's util for cpu_util_without, sync it up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6032) 	 * prev_cpu's last_update_time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6033) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6034) 	if (!(sd_flag & SD_BALANCE_FORK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6035) 		sync_entity_load_avg(&p->se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6036) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6037) 	while (sd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6038) 		struct sched_group *group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6039) 		struct sched_domain *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6040) 		int weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6042) 		if (!(sd->flags & sd_flag)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6043) 			sd = sd->child;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6044) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6045) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6047) 		group = find_idlest_group(sd, p, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6048) 		if (!group) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6049) 			sd = sd->child;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6050) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6051) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6053) 		new_cpu = find_idlest_group_cpu(group, p, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6054) 		if (new_cpu == cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6055) 			/* Now try balancing at a lower domain level of 'cpu': */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6056) 			sd = sd->child;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6057) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6058) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6059) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6060) 		/* Now try balancing at a lower domain level of 'new_cpu': */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6061) 		cpu = new_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6062) 		weight = sd->span_weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6063) 		sd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6064) 		for_each_domain(cpu, tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6065) 			if (weight <= tmp->span_weight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6066) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6067) 			if (tmp->flags & sd_flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6068) 				sd = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6069) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6070) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6072) 	return new_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6075) #ifdef CONFIG_SCHED_SMT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6076) DEFINE_STATIC_KEY_FALSE(sched_smt_present);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6077) EXPORT_SYMBOL_GPL(sched_smt_present);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6079) static inline void set_idle_cores(int cpu, int val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6080) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6081) 	struct sched_domain_shared *sds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6083) 	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6084) 	if (sds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6085) 		WRITE_ONCE(sds->has_idle_cores, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6086) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6087) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6088) static inline bool test_idle_cores(int cpu, bool def)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6089) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6090) 	struct sched_domain_shared *sds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6092) 	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6093) 	if (sds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6094) 		return READ_ONCE(sds->has_idle_cores);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6096) 	return def;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6097) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6099) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6100)  * Scans the local SMT mask to see if the entire core is idle, and records this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6101)  * information in sd_llc_shared->has_idle_cores.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6102)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6103)  * Since SMT siblings share all cache levels, inspecting this limited remote
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6104)  * state should be fairly cheap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6105)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6106) void __update_idle_core(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6107) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6108) 	int core = cpu_of(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6109) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6111) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6112) 	if (test_idle_cores(core, true))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6113) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6115) 	for_each_cpu(cpu, cpu_smt_mask(core)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6116) 		if (cpu == core)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6117) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6119) 		if (!available_idle_cpu(cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6120) 			goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6121) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6123) 	set_idle_cores(core, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6124) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6125) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6128) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6129)  * Scan the entire LLC domain for idle cores; this dynamically switches off if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6130)  * there are no idle cores left in the system; tracked through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6131)  * sd_llc->shared->has_idle_cores and enabled through update_idle_core() above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6132)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6133) static int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6134) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6135) 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6136) 	int core, cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6138) 	if (!static_branch_likely(&sched_smt_present))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6139) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6141) 	if (!test_idle_cores(target, false))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6142) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6144) 	cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6146) 	for_each_cpu_wrap(core, cpus, target) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6147) 		bool idle = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6149) 		for_each_cpu(cpu, cpu_smt_mask(core)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6150) 			if (!available_idle_cpu(cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6151) 				idle = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6152) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6153) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6154) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6155) 		cpumask_andnot(cpus, cpus, cpu_smt_mask(core));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6157) 		if (idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6158) 			return core;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6159) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6161) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6162) 	 * Failed to find an idle core; stop looking for one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6163) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6164) 	set_idle_cores(target, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6166) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6169) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6170)  * Scan the local SMT mask for idle CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6171)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6172) static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6174) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6176) 	if (!static_branch_likely(&sched_smt_present))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6177) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6179) 	for_each_cpu(cpu, cpu_smt_mask(target)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6180) 		if (!cpumask_test_cpu(cpu, p->cpus_ptr) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6181) 		    !cpumask_test_cpu(cpu, sched_domain_span(sd)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6182) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6183) 		if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6184) 			return cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6185) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6187) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6190) #else /* CONFIG_SCHED_SMT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6192) static inline int select_idle_core(struct task_struct *p, struct sched_domain *sd, int target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6193) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6194) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6197) static inline int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6199) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6202) #endif /* CONFIG_SCHED_SMT */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6204) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6205)  * Scan the LLC domain for idle CPUs; this is dynamically regulated by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6206)  * comparing the average scan cost (tracked in sd->avg_scan_cost) against the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6207)  * average idle time for this rq (as found in rq->avg_idle).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6208)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6209) static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, int target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6211) 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6212) 	struct sched_domain *this_sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6213) 	u64 avg_cost, avg_idle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6214) 	u64 time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6215) 	int this = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6216) 	int cpu, nr = INT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6218) 	this_sd = rcu_dereference(*this_cpu_ptr(&sd_llc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6219) 	if (!this_sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6220) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6222) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6223) 	 * Due to large variance we need a large fuzz factor; hackbench in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6224) 	 * particularly is sensitive here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6225) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6226) 	avg_idle = this_rq()->avg_idle / 512;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6227) 	avg_cost = this_sd->avg_scan_cost + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6229) 	if (sched_feat(SIS_AVG_CPU) && avg_idle < avg_cost)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6230) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6232) 	if (sched_feat(SIS_PROP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6233) 		u64 span_avg = sd->span_weight * avg_idle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6234) 		if (span_avg > 4*avg_cost)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6235) 			nr = div_u64(span_avg, avg_cost);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6236) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6237) 			nr = 4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6238) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6240) 	time = cpu_clock(this);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6242) 	cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6244) 	for_each_cpu_wrap(cpu, cpus, target) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6245) 		if (!--nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6246) 			return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6247) 		if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6248) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6249) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6251) 	time = cpu_clock(this) - time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6252) 	update_avg(&this_sd->avg_scan_cost, time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6254) 	return cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6257) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6258)  * Scan the asym_capacity domain for idle CPUs; pick the first idle one on which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6259)  * the task fits. If no CPU is big enough, but there are idle ones, try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6260)  * maximize capacity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6261)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6262) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6263) select_idle_capacity(struct task_struct *p, struct sched_domain *sd, int target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6264) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6265) 	unsigned long task_util, best_cap = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6266) 	int cpu, best_cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6267) 	struct cpumask *cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6269) 	cpus = this_cpu_cpumask_var_ptr(select_idle_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6270) 	cpumask_and(cpus, sched_domain_span(sd), p->cpus_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6272) 	task_util = uclamp_task_util(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6274) 	for_each_cpu_wrap(cpu, cpus, target) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6275) 		unsigned long cpu_cap = capacity_of(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6277) 		if (!available_idle_cpu(cpu) && !sched_idle_cpu(cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6278) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6279) 		if (fits_capacity(task_util, cpu_cap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6280) 			return cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6282) 		if (cpu_cap > best_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6283) 			best_cap = cpu_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6284) 			best_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6285) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6286) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6288) 	return best_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6291) static inline bool asym_fits_capacity(int task_util, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6293) 	if (static_branch_unlikely(&sched_asym_cpucapacity))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6294) 		return fits_capacity(task_util, capacity_of(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6296) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6299) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6300)  * Try and locate an idle core/thread in the LLC cache domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6301)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6302) static int select_idle_sibling(struct task_struct *p, int prev, int target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6304) 	struct sched_domain *sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6305) 	unsigned long task_util;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6306) 	int i, recent_used_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6308) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6309) 	 * On asymmetric system, update task utilization because we will check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6310) 	 * that the task fits with cpu's capacity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6311) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6312) 	if (static_branch_unlikely(&sched_asym_cpucapacity)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6313) 		sync_entity_load_avg(&p->se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6314) 		task_util = uclamp_task_util(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6315) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6317) 	if ((available_idle_cpu(target) || sched_idle_cpu(target)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6318) 	    asym_fits_capacity(task_util, target))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6319) 		return target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6321) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6322) 	 * If the previous CPU is cache affine and idle, don't be stupid:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6323) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6324) 	if (prev != target && cpus_share_cache(prev, target) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6325) 	    (available_idle_cpu(prev) || sched_idle_cpu(prev)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6326) 	    asym_fits_capacity(task_util, prev))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6327) 		return prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6329) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6330) 	 * Allow a per-cpu kthread to stack with the wakee if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6331) 	 * kworker thread and the tasks previous CPUs are the same.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6332) 	 * The assumption is that the wakee queued work for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6333) 	 * per-cpu kthread that is now complete and the wakeup is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6334) 	 * essentially a sync wakeup. An obvious example of this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6335) 	 * pattern is IO completions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6336) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6337) 	if (is_per_cpu_kthread(current) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6338) 	    in_task() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6339) 	    prev == smp_processor_id() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6340) 	    this_rq()->nr_running <= 1 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6341) 	    asym_fits_capacity(task_util, prev)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6342) 		return prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6343) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6345) 	/* Check a recently used CPU as a potential idle candidate: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6346) 	recent_used_cpu = p->recent_used_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6347) 	if (recent_used_cpu != prev &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6348) 	    recent_used_cpu != target &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6349) 	    cpus_share_cache(recent_used_cpu, target) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6350) 	    (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6351) 	    cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6352) 	    asym_fits_capacity(task_util, recent_used_cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6353) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6354) 		 * Replace recent_used_cpu with prev as it is a potential
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6355) 		 * candidate for the next wake:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6356) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6357) 		p->recent_used_cpu = prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6358) 		return recent_used_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6359) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6361) 	if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6362) 		if (rockchip_perf_get_level() == ROCKCHIP_PERFORMANCE_HIGH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6363) 			goto sd_llc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6364) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6365) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6366) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6367) 	 * For asymmetric CPU capacity systems, our domain of interest is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6368) 	 * sd_asym_cpucapacity rather than sd_llc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6369) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6370) 	if (static_branch_unlikely(&sched_asym_cpucapacity)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6371) 		sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, target));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6372) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6373) 		 * On an asymmetric CPU capacity system where an exclusive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6374) 		 * cpuset defines a symmetric island (i.e. one unique
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6375) 		 * capacity_orig value through the cpuset), the key will be set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6376) 		 * but the CPUs within that cpuset will not have a domain with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6377) 		 * SD_ASYM_CPUCAPACITY. These should follow the usual symmetric
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6378) 		 * capacity path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6379) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6380) 		if (sd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6381) 			i = select_idle_capacity(p, sd, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6382) 			return ((unsigned)i < nr_cpumask_bits) ? i : target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6383) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6384) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6385) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6386) sd_llc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6387) 	sd = rcu_dereference(per_cpu(sd_llc, target));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6388) 	if (!sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6389) 		return target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6391) 	i = select_idle_core(p, sd, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6392) 	if ((unsigned)i < nr_cpumask_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6393) 		return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6395) 	i = select_idle_cpu(p, sd, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6396) 	if ((unsigned)i < nr_cpumask_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6397) 		return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6399) 	i = select_idle_smt(p, sd, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6400) 	if ((unsigned)i < nr_cpumask_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6401) 		return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6403) 	return target;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6406) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6407)  * Amount of capacity of a CPU that is (estimated to be) used by CFS tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6408)  * @cpu: the CPU to get the utilization of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6409)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6410)  * The unit of the return value must be the one of capacity so we can compare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6411)  * the utilization with the capacity of the CPU that is available for CFS task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6412)  * (ie cpu_capacity).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6413)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6414)  * cfs_rq.avg.util_avg is the sum of running time of runnable tasks plus the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6415)  * recent utilization of currently non-runnable tasks on a CPU. It represents
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6416)  * the amount of utilization of a CPU in the range [0..capacity_orig] where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6417)  * capacity_orig is the cpu_capacity available at the highest frequency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6418)  * (arch_scale_freq_capacity()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6419)  * The utilization of a CPU converges towards a sum equal to or less than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6420)  * current capacity (capacity_curr <= capacity_orig) of the CPU because it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6421)  * the running time on this CPU scaled by capacity_curr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6422)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6423)  * The estimated utilization of a CPU is defined to be the maximum between its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6424)  * cfs_rq.avg.util_avg and the sum of the estimated utilization of the tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6425)  * currently RUNNABLE on that CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6426)  * This allows to properly represent the expected utilization of a CPU which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6427)  * has just got a big task running since a long sleep period. At the same time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6428)  * however it preserves the benefits of the "blocked utilization" in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6429)  * describing the potential for other tasks waking up on the same CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6430)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6431)  * Nevertheless, cfs_rq.avg.util_avg can be higher than capacity_curr or even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6432)  * higher than capacity_orig because of unfortunate rounding in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6433)  * cfs.avg.util_avg or just after migrating tasks and new task wakeups until
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6434)  * the average stabilizes with the new running time. We need to check that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6435)  * utilization stays within the range of [0..capacity_orig] and cap it if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6436)  * necessary. Without utilization capping, a group could be seen as overloaded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6437)  * (CPU0 utilization at 121% + CPU1 utilization at 80%) whereas CPU1 has 20% of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6438)  * available capacity. We allow utilization to overshoot capacity_curr (but not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6439)  * capacity_orig) as it useful for predicting the capacity required after task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6440)  * migrations (scheduler-driven DVFS).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6441)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6442)  * Return: the (estimated) utilization for the specified CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6443)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6444) static inline unsigned long cpu_util(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6445) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6446) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6447) 	unsigned int util;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6449) 	cfs_rq = &cpu_rq(cpu)->cfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6450) 	util = READ_ONCE(cfs_rq->avg.util_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6452) 	if (sched_feat(UTIL_EST))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6453) 		util = max(util, READ_ONCE(cfs_rq->avg.util_est.enqueued));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6455) 	return min_t(unsigned long, util, capacity_orig_of(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6458) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6459)  * cpu_util_without: compute cpu utilization without any contributions from *p
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6460)  * @cpu: the CPU which utilization is requested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6461)  * @p: the task which utilization should be discounted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6462)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6463)  * The utilization of a CPU is defined by the utilization of tasks currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6464)  * enqueued on that CPU as well as tasks which are currently sleeping after an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6465)  * execution on that CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6466)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6467)  * This method returns the utilization of the specified CPU by discounting the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6468)  * utilization of the specified task, whenever the task is currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6469)  * contributing to the CPU utilization.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6470)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6471) static unsigned long cpu_util_without(int cpu, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6472) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6473) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6474) 	unsigned int util;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6476) 	/* Task has no contribution or is new */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6477) 	if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6478) 		return cpu_util(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6480) 	cfs_rq = &cpu_rq(cpu)->cfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6481) 	util = READ_ONCE(cfs_rq->avg.util_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6483) 	/* Discount task's util from CPU's util */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6484) 	lsub_positive(&util, task_util(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6486) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6487) 	 * Covered cases:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6488) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6489) 	 * a) if *p is the only task sleeping on this CPU, then:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6490) 	 *      cpu_util (== task_util) > util_est (== 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6491) 	 *    and thus we return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6492) 	 *      cpu_util_without = (cpu_util - task_util) = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6493) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6494) 	 * b) if other tasks are SLEEPING on this CPU, which is now exiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6495) 	 *    IDLE, then:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6496) 	 *      cpu_util >= task_util
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6497) 	 *      cpu_util > util_est (== 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6498) 	 *    and thus we discount *p's blocked utilization to return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6499) 	 *      cpu_util_without = (cpu_util - task_util) >= 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6500) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6501) 	 * c) if other tasks are RUNNABLE on that CPU and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6502) 	 *      util_est > cpu_util
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6503) 	 *    then we use util_est since it returns a more restrictive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6504) 	 *    estimation of the spare capacity on that CPU, by just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6505) 	 *    considering the expected utilization of tasks already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6506) 	 *    runnable on that CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6507) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6508) 	 * Cases a) and b) are covered by the above code, while case c) is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6509) 	 * covered by the following code when estimated utilization is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6510) 	 * enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6511) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6512) 	if (sched_feat(UTIL_EST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6513) 		unsigned int estimated =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6514) 			READ_ONCE(cfs_rq->avg.util_est.enqueued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6516) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6517) 		 * Despite the following checks we still have a small window
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6518) 		 * for a possible race, when an execl's select_task_rq_fair()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6519) 		 * races with LB's detach_task():
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6520) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6521) 		 *   detach_task()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6522) 		 *     p->on_rq = TASK_ON_RQ_MIGRATING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6523) 		 *     ---------------------------------- A
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6524) 		 *     deactivate_task()                   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6525) 		 *       dequeue_task()                     + RaceTime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6526) 		 *         util_est_dequeue()              /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6527) 		 *     ---------------------------------- B
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6528) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6529) 		 * The additional check on "current == p" it's required to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6530) 		 * properly fix the execl regression and it helps in further
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6531) 		 * reducing the chances for the above race.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6532) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6533) 		if (unlikely(task_on_rq_queued(p) || current == p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6534) 			lsub_positive(&estimated, _task_util_est(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6536) 		util = max(util, estimated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6537) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6539) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6540) 	 * Utilization (estimated) can exceed the CPU capacity, thus let's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6541) 	 * clamp to the maximum CPU capacity to ensure consistency with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6542) 	 * the cpu_util call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6543) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6544) 	return min_t(unsigned long, util, capacity_orig_of(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6547) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6548)  * Predicts what cpu_util(@cpu) would return if @p was migrated (and enqueued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6549)  * to @dst_cpu.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6550)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6551) static unsigned long cpu_util_next(int cpu, struct task_struct *p, int dst_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6552) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6553) 	struct cfs_rq *cfs_rq = &cpu_rq(cpu)->cfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6554) 	unsigned long util_est, util = READ_ONCE(cfs_rq->avg.util_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6556) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6557) 	 * If @p migrates from @cpu to another, remove its contribution. Or,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6558) 	 * if @p migrates from another CPU to @cpu, add its contribution. In
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6559) 	 * the other cases, @cpu is not impacted by the migration, so the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6560) 	 * util_avg should already be correct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6561) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6562) 	if (task_cpu(p) == cpu && dst_cpu != cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6563) 		sub_positive(&util, task_util(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6564) 	else if (task_cpu(p) != cpu && dst_cpu == cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6565) 		util += task_util(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6566) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6567) 	if (sched_feat(UTIL_EST)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6568) 		util_est = READ_ONCE(cfs_rq->avg.util_est.enqueued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6570) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6571) 		 * During wake-up, the task isn't enqueued yet and doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6572) 		 * appear in the cfs_rq->avg.util_est.enqueued of any rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6573) 		 * so just add it (if needed) to "simulate" what will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6574) 		 * cpu_util() after the task has been enqueued.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6575) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6576) 		if (dst_cpu == cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6577) 			util_est += _task_util_est(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6579) 		util = max(util, util_est);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6580) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6582) 	return min(util, capacity_orig_of(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6585) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6586)  * compute_energy(): Estimates the energy that @pd would consume if @p was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6587)  * migrated to @dst_cpu. compute_energy() predicts what will be the utilization
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6588)  * landscape of @pd's CPUs after the task migration, and uses the Energy Model
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6589)  * to compute what would be the energy if we decided to actually migrate that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6590)  * task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6591)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6592) static long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6593) compute_energy(struct task_struct *p, int dst_cpu, struct perf_domain *pd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6595) 	struct cpumask *pd_mask = perf_domain_span(pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6596) 	unsigned long cpu_cap = arch_scale_cpu_capacity(cpumask_first(pd_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6597) 	unsigned long max_util = 0, sum_util = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6598) 	unsigned long energy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6599) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6601) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6602) 	 * The capacity state of CPUs of the current rd can be driven by CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6603) 	 * of another rd if they belong to the same pd. So, account for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6604) 	 * utilization of these CPUs too by masking pd with cpu_online_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6605) 	 * instead of the rd span.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6606) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6607) 	 * If an entire pd is outside of the current rd, it will not appear in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6608) 	 * its pd list and will not be accounted by compute_energy().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6609) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6610) 	for_each_cpu_and(cpu, pd_mask, cpu_online_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6611) 		unsigned long cpu_util, util_cfs = cpu_util_next(cpu, p, dst_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6612) 		struct task_struct *tsk = cpu == dst_cpu ? p : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6614) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6615) 		 * Busy time computation: utilization clamping is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6616) 		 * required since the ratio (sum_util / cpu_capacity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6617) 		 * is already enough to scale the EM reported power
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6618) 		 * consumption at the (eventually clamped) cpu_capacity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6619) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6620) 		sum_util += schedutil_cpu_util(cpu, util_cfs, cpu_cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6621) 					       ENERGY_UTIL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6623) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6624) 		 * Performance domain frequency: utilization clamping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6625) 		 * must be considered since it affects the selection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6626) 		 * of the performance domain frequency.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6627) 		 * NOTE: in case RT tasks are running, by default the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6628) 		 * FREQUENCY_UTIL's utilization can be max OPP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6629) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6630) 		cpu_util = schedutil_cpu_util(cpu, util_cfs, cpu_cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6631) 					      FREQUENCY_UTIL, tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6632) 		max_util = max(max_util, cpu_util);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6633) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6635) 	trace_android_vh_em_cpu_energy(pd->em_pd, max_util, sum_util, &energy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6636) 	if (!energy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6637) 		energy = em_cpu_energy(pd->em_pd, max_util, sum_util);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6639) 	return energy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6642) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6643)  * find_energy_efficient_cpu(): Find most energy-efficient target CPU for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6644)  * waking task. find_energy_efficient_cpu() looks for the CPU with maximum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6645)  * spare capacity in each performance domain and uses it as a potential
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6646)  * candidate to execute the task. Then, it uses the Energy Model to figure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6647)  * out which of the CPU candidates is the most energy-efficient.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6648)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6649)  * The rationale for this heuristic is as follows. In a performance domain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6650)  * all the most energy efficient CPU candidates (according to the Energy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6651)  * Model) are those for which we'll request a low frequency. When there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6652)  * several CPUs for which the frequency request will be the same, we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6653)  * have enough data to break the tie between them, because the Energy Model
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6654)  * only includes active power costs. With this model, if we assume that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6655)  * frequency requests follow utilization (e.g. using schedutil), the CPU with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6656)  * the maximum spare capacity in a performance domain is guaranteed to be among
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6657)  * the best candidates of the performance domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6658)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6659)  * In practice, it could be preferable from an energy standpoint to pack
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6660)  * small tasks on a CPU in order to let other CPUs go in deeper idle states,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6661)  * but that could also hurt our chances to go cluster idle, and we have no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6662)  * ways to tell with the current Energy Model if this is actually a good
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6663)  * idea or not. So, find_energy_efficient_cpu() basically favors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6664)  * cluster-packing, and spreading inside a cluster. That should at least be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6665)  * a good thing for latency, and this is consistent with the idea that most
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6666)  * of the energy savings of EAS come from the asymmetry of the system, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6667)  * not so much from breaking the tie between identical CPUs. That's also the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6668)  * reason why EAS is enabled in the topology code only for systems where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6669)  * SD_ASYM_CPUCAPACITY is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6670)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6671)  * NOTE: Forkees are not accepted in the energy-aware wake-up path because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6672)  * they don't have any useful utilization data yet and it's not possible to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6673)  * forecast their impact on energy consumption. Consequently, they will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6674)  * placed by find_idlest_cpu() on the least loaded CPU, which might turn out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6675)  * to be energy-inefficient in some use-cases. The alternative would be to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6676)  * bias new tasks towards specific types of CPUs first, or to try to infer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6677)  * their util_avg from the parent task, but those heuristics could hurt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6678)  * other use-cases too. So, until someone finds a better way to solve this,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6679)  * let's keep things simple by re-using the existing slow path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6680)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6681) static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu, int sync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6682) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6683) 	unsigned long prev_delta = ULONG_MAX, best_delta = ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6684) 	struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6685) 	int max_spare_cap_cpu_ls = prev_cpu, best_idle_cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6686) 	unsigned long max_spare_cap_ls = 0, target_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6687) 	unsigned long cpu_cap, util, base_energy = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6688) 	bool boosted, latency_sensitive = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6689) 	unsigned int min_exit_lat = UINT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6690) 	int cpu, best_energy_cpu = prev_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6691) 	struct cpuidle_state *idle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6692) 	struct sched_domain *sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6693) 	struct perf_domain *pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6694) 	int new_cpu = INT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6696) 	sync_entity_load_avg(&p->se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6697) 	trace_android_rvh_find_energy_efficient_cpu(p, prev_cpu, sync, &new_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6698) 	if (new_cpu != INT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6699) 		return new_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6701) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6702) 	pd = rcu_dereference(rd->pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6703) 	if (!pd || READ_ONCE(rd->overutilized))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6704) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6706) 	cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6707) 	if (sync && cpu_rq(cpu)->nr_running == 1 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6708) 	    cpumask_test_cpu(cpu, p->cpus_ptr) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6709) 	    task_fits_capacity(p, capacity_of(cpu))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6710) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6711) 		return cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6712) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6714) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6715) 	 * Energy-aware wake-up happens on the lowest sched_domain starting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6716) 	 * from sd_asym_cpucapacity spanning over this_cpu and prev_cpu.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6717) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6718) 	sd = rcu_dereference(*this_cpu_ptr(&sd_asym_cpucapacity));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6719) 	while (sd && !cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6720) 		sd = sd->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6721) 	if (!sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6722) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6724) 	if (!task_util_est(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6725) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6727) 	latency_sensitive = uclamp_latency_sensitive(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6728) 	boosted = uclamp_boosted(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6729) 	target_cap = boosted ? 0 : ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6731) 	for (; pd; pd = pd->next) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6732) 		unsigned long cur_delta, spare_cap, max_spare_cap = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6733) 		unsigned long base_energy_pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6734) 		int max_spare_cap_cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6736) 		/* Compute the 'base' energy of the pd, without @p */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6737) 		base_energy_pd = compute_energy(p, -1, pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6738) 		base_energy += base_energy_pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6739) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6740) 		for_each_cpu_and(cpu, perf_domain_span(pd), sched_domain_span(sd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6741) 			if (!cpumask_test_cpu(cpu, p->cpus_ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6742) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6744) 			util = cpu_util_next(cpu, p, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6745) 			cpu_cap = capacity_of(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6746) 			spare_cap = cpu_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6747) 			lsub_positive(&spare_cap, util);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6749) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6750) 			 * Skip CPUs that cannot satisfy the capacity request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6751) 			 * IOW, placing the task there would make the CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6752) 			 * overutilized. Take uclamp into account to see how
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6753) 			 * much capacity we can get out of the CPU; this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6754) 			 * aligned with schedutil_cpu_util().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6755) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6756) 			util = uclamp_rq_util_with(cpu_rq(cpu), util, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6757) 			if (!fits_capacity(util, cpu_cap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6758) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6760) 			/* Always use prev_cpu as a candidate. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6761) 			if (!latency_sensitive && cpu == prev_cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6762) 				prev_delta = compute_energy(p, prev_cpu, pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6763) 				prev_delta -= base_energy_pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6764) 				best_delta = min(best_delta, prev_delta);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6765) 				if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6766) 					if (prev_delta == best_delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6767) 						best_energy_cpu = prev_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6768) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6769) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6771) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6772) 			 * Find the CPU with the maximum spare capacity in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6773) 			 * the performance domain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6774) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6775) 			if (spare_cap > max_spare_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6776) 				max_spare_cap = spare_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6777) 				max_spare_cap_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6778) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6780) 			if (!latency_sensitive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6781) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6783) 			if (idle_cpu(cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6784) 				cpu_cap = capacity_orig_of(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6785) 				if (boosted && cpu_cap < target_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6786) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6787) 				if (!boosted && cpu_cap > target_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6788) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6789) 				idle = idle_get_state(cpu_rq(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6790) 				if (idle && idle->exit_latency > min_exit_lat &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6791) 						cpu_cap == target_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6792) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6794) 				if (idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6795) 					min_exit_lat = idle->exit_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6796) 				target_cap = cpu_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6797) 				best_idle_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6798) 			} else if (spare_cap > max_spare_cap_ls) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6799) 				max_spare_cap_ls = spare_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6800) 				max_spare_cap_cpu_ls = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6801) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6802) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6803) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6804) 		/* Evaluate the energy impact of using this CPU. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6805) 		if (!latency_sensitive && max_spare_cap_cpu >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6806) 						max_spare_cap_cpu != prev_cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6807) 			cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6808) 			cur_delta -= base_energy_pd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6809) 			if (cur_delta < best_delta) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6810) 				best_delta = cur_delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6811) 				best_energy_cpu = max_spare_cap_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6812) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6813) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6814) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6815) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6816) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6818) 	if (latency_sensitive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6819) 		return best_idle_cpu >= 0 ? best_idle_cpu : max_spare_cap_cpu_ls;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6821) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6822) 	 * Pick the best CPU if prev_cpu cannot be used, or if it saves at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6823) 	 * least 6% of the energy used by prev_cpu.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6824) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6825) 	if (prev_delta == ULONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6826) 		return best_energy_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6828) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6829) 	 * when select ROCKCHIP_PERFORMANCE_LOW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6830) 	 * Pick best_energy_cpu immediately if prev_cpu is big cpu and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6831) 	 * best_energy_cpu is little cpu, so that tasks can migrate from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6832) 	 * big cpu to little cpu easier to save power.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6833) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6834) 	if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6835) 		struct cpumask *cpul_mask = rockchip_perf_get_cpul_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6836) 		struct cpumask *cpub_mask = rockchip_perf_get_cpub_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6837) 		int level = rockchip_perf_get_level();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6839) 		if ((level == ROCKCHIP_PERFORMANCE_LOW) && cpul_mask &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6840) 		    cpub_mask && cpumask_test_cpu(prev_cpu, cpub_mask) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6841) 		    cpumask_test_cpu(best_energy_cpu, cpul_mask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6842) 			return best_energy_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6843) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6844) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6846) 	if ((prev_delta - best_delta) > ((prev_delta + base_energy) >> 4))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6847) 		return best_energy_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6849) 	return prev_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6851) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6852) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6853) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6854) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6855) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6857) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6858)  * select_task_rq_fair: Select target runqueue for the waking task in domains
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6859)  * that have the 'sd_flag' flag set. In practice, this is SD_BALANCE_WAKE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6860)  * SD_BALANCE_FORK, or SD_BALANCE_EXEC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6861)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6862)  * Balances load by selecting the idlest CPU in the idlest group, or under
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6863)  * certain conditions an idle sibling CPU if the domain has SD_WAKE_AFFINE set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6864)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6865)  * Returns the target CPU number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6866)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6867)  * preempt must be disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6868)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6869) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6870) select_task_rq_fair(struct task_struct *p, int prev_cpu, int sd_flag, int wake_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6871) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6872) 	struct sched_domain *tmp, *sd = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6873) 	int cpu = smp_processor_id();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6874) 	int new_cpu = prev_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6875) 	int want_affine = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6876) 	int sync = (wake_flags & WF_SYNC) && !(current->flags & PF_EXITING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6877) 	int target_cpu = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6879) 	if (trace_android_rvh_select_task_rq_fair_enabled() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6880) 	    !(sd_flag & SD_BALANCE_FORK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6881) 		sync_entity_load_avg(&p->se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6882) 	trace_android_rvh_select_task_rq_fair(p, prev_cpu, sd_flag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6883) 			wake_flags, &target_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6884) 	if (target_cpu >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6885) 		return target_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6887) 	if (sd_flag & SD_BALANCE_WAKE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6888) 		record_wakee(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6890) 		if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6891) 			if (rockchip_perf_get_level() == ROCKCHIP_PERFORMANCE_HIGH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6892) 				goto no_eas;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6893) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6895) 		if (sched_energy_enabled()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6896) 			new_cpu = find_energy_efficient_cpu(p, prev_cpu, sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6897) 			if (new_cpu >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6898) 				return new_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6899) 			new_cpu = prev_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6900) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6902) no_eas:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6903) 		want_affine = !wake_wide(p) && cpumask_test_cpu(cpu, p->cpus_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6904) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6906) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6907) 	for_each_domain(cpu, tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6908) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6909) 		 * If both 'cpu' and 'prev_cpu' are part of this domain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6910) 		 * cpu is a valid SD_WAKE_AFFINE target.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6911) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6912) 		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6913) 		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6914) 			if (cpu != prev_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6915) 				new_cpu = wake_affine(tmp, p, cpu, prev_cpu, sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6917) 			sd = NULL; /* Prefer wake_affine over balance flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6918) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6919) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6921) 		if (tmp->flags & sd_flag)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6922) 			sd = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6923) 		else if (!want_affine)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6924) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6925) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6927) 	if (unlikely(sd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6928) 		/* Slow path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6929) 		new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6930) 	} else if (sd_flag & SD_BALANCE_WAKE) { /* XXX always ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6931) 		/* Fast path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6933) 		new_cpu = select_idle_sibling(p, prev_cpu, new_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6935) 		if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6936) 			struct root_domain *rd = cpu_rq(cpu)->rd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6937) 			struct cpumask *cpul_mask = rockchip_perf_get_cpul_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6938) 			struct cpumask *cpub_mask = rockchip_perf_get_cpub_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6939) 			int level = rockchip_perf_get_level();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6941) 			if ((level == ROCKCHIP_PERFORMANCE_HIGH) && !READ_ONCE(rd->overutilized) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6942) 			    cpul_mask && cpub_mask && cpumask_intersects(p->cpus_ptr, cpub_mask) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6943) 			    cpumask_test_cpu(new_cpu, cpul_mask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6944) 				for_each_domain(cpu, tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6945) 					sd = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6946) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6947) 				if (sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6948) 					new_cpu = find_idlest_cpu(sd, p, cpu, prev_cpu, sd_flag);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6949) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6950) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6951) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6952) 		if (want_affine)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6953) 			current->recent_used_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6954) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6955) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6957) 	return new_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6960) static void detach_entity_cfs_rq(struct sched_entity *se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6962) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6963)  * Called immediately before a task is migrated to a new CPU; task_cpu(p) and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6964)  * cfs_rq_of(p) references at time of call are still valid and identify the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6965)  * previous CPU. The caller guarantees p->pi_lock or task_rq(p)->lock is held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6966)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6967) static void migrate_task_rq_fair(struct task_struct *p, int new_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6968) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6969) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6970) 	 * As blocked tasks retain absolute vruntime the migration needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6971) 	 * deal with this by subtracting the old and adding the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6972) 	 * min_vruntime -- the latter is done by enqueue_entity() when placing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6973) 	 * the task on the new runqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6974) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6975) 	if (p->state == TASK_WAKING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6976) 		struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6977) 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6978) 		u64 min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6979) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6980) #ifndef CONFIG_64BIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6981) 		u64 min_vruntime_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6983) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6984) 			min_vruntime_copy = cfs_rq->min_vruntime_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6985) 			smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6986) 			min_vruntime = cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6987) 		} while (min_vruntime != min_vruntime_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6988) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6989) 		min_vruntime = cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6990) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6992) 		se->vruntime -= min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6993) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6995) 	if (p->on_rq == TASK_ON_RQ_MIGRATING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6996) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6997) 		 * In case of TASK_ON_RQ_MIGRATING we in fact hold the 'old'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6998) 		 * rq->lock and can modify state directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  6999) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7000) 		lockdep_assert_held(&task_rq(p)->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7001) 		detach_entity_cfs_rq(&p->se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7003) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7004) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7005) 		 * We are supposed to update the task to "current" time, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7006) 		 * its up to date and ready to go to new CPU/cfs_rq. But we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7007) 		 * have difficulty in getting what current time is, so simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7008) 		 * throw away the out-of-date time. This will result in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7009) 		 * wakee task is less decayed, but giving the wakee more load
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7010) 		 * sounds not bad.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7011) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7012) 		remove_entity_load_avg(&p->se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7013) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7015) 	/* Tell new CPU we are migrated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7016) 	p->se.avg.last_update_time = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7018) 	/* We have migrated, no longer consider this task hot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7019) 	p->se.exec_start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7020) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7021) 	update_scan_period(p, new_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7024) static void task_dead_fair(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7025) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7026) 	remove_entity_load_avg(&p->se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7028) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7029) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7030) balance_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7031) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7032) 	if (rq->nr_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7033) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7034) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7035) 	return newidle_balance(rq, rf) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7037) #endif /* CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7039) static unsigned long wakeup_gran(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7040) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7041) 	unsigned long gran = sysctl_sched_wakeup_granularity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7042) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7043) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7044) 	 * Since its curr running now, convert the gran from real-time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7045) 	 * to virtual-time in his units.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7046) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7047) 	 * By using 'se' instead of 'curr' we penalize light tasks, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7048) 	 * they get preempted easier. That is, if 'se' < 'curr' then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7049) 	 * the resulting gran will be larger, therefore penalizing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7050) 	 * lighter, if otoh 'se' > 'curr' then the resulting gran will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7051) 	 * be smaller, again penalizing the lighter task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7052) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7053) 	 * This is especially important for buddies when the leftmost
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7054) 	 * task is higher priority than the buddy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7055) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7056) 	return calc_delta_fair(gran, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7059) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7060)  * Should 'se' preempt 'curr'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7061)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7062)  *             |s1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7063)  *        |s2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7064)  *   |s3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7065)  *         g
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7066)  *      |<--->|c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7067)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7068)  *  w(c, s1) = -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7069)  *  w(c, s2) =  0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7070)  *  w(c, s3) =  1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7071)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7072)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7073) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7074) wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7075) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7076) 	s64 gran, vdiff = curr->vruntime - se->vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7078) 	if (vdiff <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7079) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7081) 	gran = wakeup_gran(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7082) 	if (vdiff > gran)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7083) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7085) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7086) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7087) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7088) static void set_last_buddy(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7089) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7090) 	if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7091) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7093) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7094) 		if (SCHED_WARN_ON(!se->on_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7095) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7096) 		cfs_rq_of(se)->last = se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7097) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7098) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7100) static void set_next_buddy(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7101) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7102) 	if (entity_is_task(se) && unlikely(task_has_idle_policy(task_of(se))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7103) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7105) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7106) 		if (SCHED_WARN_ON(!se->on_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7107) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7108) 		cfs_rq_of(se)->next = se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7109) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7112) static void set_skip_buddy(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7113) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7114) 	for_each_sched_entity(se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7115) 		cfs_rq_of(se)->skip = se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7118) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7119)  * Preempt the current task with a newly woken task if needed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7120)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7121) static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7123) 	struct task_struct *curr = rq->curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7124) 	struct sched_entity *se = &curr->se, *pse = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7125) 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7126) 	int scale = cfs_rq->nr_running >= sched_nr_latency;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7127) 	int next_buddy_marked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7128) 	bool preempt = false, nopreempt = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7130) 	if (unlikely(se == pse))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7131) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7133) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7134) 	 * This is possible from callers such as attach_tasks(), in which we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7135) 	 * unconditionally check_prempt_curr() after an enqueue (which may have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7136) 	 * lead to a throttle).  This both saves work and prevents false
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7137) 	 * next-buddy nomination below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7138) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7139) 	if (unlikely(throttled_hierarchy(cfs_rq_of(pse))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7140) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7142) 	if (sched_feat(NEXT_BUDDY) && scale && !(wake_flags & WF_FORK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7143) 		set_next_buddy(pse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7144) 		next_buddy_marked = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7145) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7147) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7148) 	 * We can come here with TIF_NEED_RESCHED already set from new task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7149) 	 * wake up path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7150) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7151) 	 * Note: this also catches the edge-case of curr being in a throttled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7152) 	 * group (e.g. via set_curr_task), since update_curr() (in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7153) 	 * enqueue of curr) will have resulted in resched being set.  This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7154) 	 * prevents us from potentially nominating it as a false LAST_BUDDY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7155) 	 * below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7156) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7157) 	if (test_tsk_need_resched(curr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7158) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7160) 	/* Idle tasks are by definition preempted by non-idle tasks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7161) 	if (unlikely(task_has_idle_policy(curr)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7162) 	    likely(!task_has_idle_policy(p)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7163) 		goto preempt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7165) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7166) 	 * Batch and idle tasks do not preempt non-idle tasks (their preemption
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7167) 	 * is driven by the tick):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7168) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7169) 	if (unlikely(p->policy != SCHED_NORMAL) || !sched_feat(WAKEUP_PREEMPTION))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7170) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7172) 	find_matching_se(&se, &pse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7173) 	update_curr(cfs_rq_of(se));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7174) 	trace_android_rvh_check_preempt_wakeup(rq, p, &preempt, &nopreempt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7175) 			wake_flags, se, pse, next_buddy_marked, sysctl_sched_wakeup_granularity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7176) 	if (preempt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7177) 		goto preempt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7178) 	if (nopreempt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7179) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7180) 	BUG_ON(!pse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7181) 	if (wakeup_preempt_entity(se, pse) == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7182) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7183) 		 * Bias pick_next to pick the sched entity that is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7184) 		 * triggering this preemption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7185) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7186) 		if (!next_buddy_marked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7187) 			set_next_buddy(pse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7188) 		goto preempt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7189) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7191) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7193) preempt:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7194) 	resched_curr(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7195) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7196) 	 * Only set the backward buddy when the current task is still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7197) 	 * on the rq. This can happen when a wakeup gets interleaved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7198) 	 * with schedule on the ->pre_schedule() or idle_balance()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7199) 	 * point, either of which can * drop the rq lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7200) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7201) 	 * Also, during early boot the idle thread is in the fair class,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7202) 	 * for obvious reasons its a bad idea to schedule back to it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7203) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7204) 	if (unlikely(!se->on_rq || curr == rq->idle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7205) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7207) 	if (sched_feat(LAST_BUDDY) && scale && entity_is_task(se))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7208) 		set_last_buddy(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7211) struct task_struct *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7212) pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7214) 	struct cfs_rq *cfs_rq = &rq->cfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7215) 	struct sched_entity *se = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7216) 	struct task_struct *p = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7217) 	int new_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7218) 	bool repick = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7220) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7221) 	if (!sched_fair_runnable(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7222) 		goto idle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7224) #ifdef CONFIG_FAIR_GROUP_SCHED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7225) 	if (!prev || prev->sched_class != &fair_sched_class)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7226) 		goto simple;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7228) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7229) 	 * Because of the set_next_buddy() in dequeue_task_fair() it is rather
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7230) 	 * likely that a next task is from the same cgroup as the current.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7231) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7232) 	 * Therefore attempt to avoid putting and setting the entire cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7233) 	 * hierarchy, only change the part that actually changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7234) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7236) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7237) 		struct sched_entity *curr = cfs_rq->curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7239) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7240) 		 * Since we got here without doing put_prev_entity() we also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7241) 		 * have to consider cfs_rq->curr. If it is still a runnable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7242) 		 * entity, update_curr() will update its vruntime, otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7243) 		 * forget we've ever seen it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7244) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7245) 		if (curr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7246) 			if (curr->on_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7247) 				update_curr(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7248) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7249) 				curr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7251) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7252) 			 * This call to check_cfs_rq_runtime() will do the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7253) 			 * throttle and dequeue its entity in the parent(s).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7254) 			 * Therefore the nr_running test will indeed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7255) 			 * be correct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7256) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7257) 			if (unlikely(check_cfs_rq_runtime(cfs_rq))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7258) 				cfs_rq = &rq->cfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7260) 				if (!cfs_rq->nr_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7261) 					goto idle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7263) 				goto simple;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7264) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7265) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7267) 		se = pick_next_entity(cfs_rq, curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7268) 		cfs_rq = group_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7269) 	} while (cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7271) 	p = task_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7272) 	trace_android_rvh_replace_next_task_fair(rq, &p, &se, &repick, false, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7273) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7274) 	 * Since we haven't yet done put_prev_entity and if the selected task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7275) 	 * is a different task than we started out with, try and touch the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7276) 	 * least amount of cfs_rqs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7277) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7278) 	if (prev != p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7279) 		struct sched_entity *pse = &prev->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7281) 		while (!(cfs_rq = is_same_group(se, pse))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7282) 			int se_depth = se->depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7283) 			int pse_depth = pse->depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7285) 			if (se_depth <= pse_depth) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7286) 				put_prev_entity(cfs_rq_of(pse), pse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7287) 				pse = parent_entity(pse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7288) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7289) 			if (se_depth >= pse_depth) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7290) 				set_next_entity(cfs_rq_of(se), se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7291) 				se = parent_entity(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7292) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7293) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7295) 		put_prev_entity(cfs_rq, pse);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7296) 		set_next_entity(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7297) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7299) 	goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7300) simple:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7301) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7302) 	if (prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7303) 		put_prev_task(rq, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7305) 	trace_android_rvh_replace_next_task_fair(rq, &p, &se, &repick, true, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7306) 	if (repick) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7307) 		for_each_sched_entity(se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7308) 			set_next_entity(cfs_rq_of(se), se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7309) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7310) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7312) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7313) 		se = pick_next_entity(cfs_rq, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7314) 		set_next_entity(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7315) 		cfs_rq = group_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7316) 	} while (cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7318) 	p = task_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7320) done: __maybe_unused;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7321) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7322) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7323) 	 * Move the next running task to the front of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7324) 	 * the list, so our cfs_tasks list becomes MRU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7325) 	 * one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7326) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7327) 	list_move(&p->se.group_node, &rq->cfs_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7328) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7330) 	if (hrtick_enabled(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7331) 		hrtick_start_fair(rq, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7333) 	update_misfit_status(p, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7335) 	return p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7337) idle:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7338) 	if (!rf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7339) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7341) 	new_tasks = newidle_balance(rq, rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7343) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7344) 	 * Because newidle_balance() releases (and re-acquires) rq->lock, it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7345) 	 * possible for any higher priority task to appear. In that case we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7346) 	 * must re-start the pick_next_entity() loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7347) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7348) 	if (new_tasks < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7349) 		return RETRY_TASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7351) 	if (new_tasks > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7352) 		goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7354) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7355) 	 * rq is about to be idle, check if we need to update the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7356) 	 * lost_idle_time of clock_pelt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7357) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7358) 	update_idle_rq_clock_pelt(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7360) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7363) static struct task_struct *__pick_next_task_fair(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7365) 	return pick_next_task_fair(rq, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7367) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7368) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7369)  * Account for a descheduled task:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7370)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7371) static void put_prev_task_fair(struct rq *rq, struct task_struct *prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7372) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7373) 	struct sched_entity *se = &prev->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7374) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7376) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7377) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7378) 		put_prev_entity(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7379) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7382) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7383)  * sched_yield() is very simple
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7384)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7385)  * The magic of dealing with the ->skip buddy is in pick_next_entity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7386)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7387) static void yield_task_fair(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7388) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7389) 	struct task_struct *curr = rq->curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7390) 	struct cfs_rq *cfs_rq = task_cfs_rq(curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7391) 	struct sched_entity *se = &curr->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7393) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7394) 	 * Are we the only task in the tree?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7395) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7396) 	if (unlikely(rq->nr_running == 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7397) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7399) 	clear_buddies(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7401) 	if (curr->policy != SCHED_BATCH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7402) 		update_rq_clock(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7403) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7404) 		 * Update run-time statistics of the 'current'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7405) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7406) 		update_curr(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7407) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7408) 		 * Tell update_rq_clock() that we've just updated,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7409) 		 * so we don't do microscopic update in schedule()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7410) 		 * and double the fastpath cost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7411) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7412) 		rq_clock_skip_update(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7413) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7415) 	set_skip_buddy(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7418) static bool yield_to_task_fair(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7420) 	struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7422) 	/* throttled hierarchies are not runnable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7423) 	if (!se->on_rq || throttled_hierarchy(cfs_rq_of(se)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7424) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7426) 	/* Tell the scheduler that we'd really like pse to run next. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7427) 	set_next_buddy(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7429) 	yield_task_fair(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7431) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7434) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7435) /**************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7436)  * Fair scheduling class load-balancing methods.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7437)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7438)  * BASICS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7439)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7440)  * The purpose of load-balancing is to achieve the same basic fairness the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7441)  * per-CPU scheduler provides, namely provide a proportional amount of compute
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7442)  * time to each task. This is expressed in the following equation:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7443)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7444)  *   W_i,n/P_i == W_j,n/P_j for all i,j                               (1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7445)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7446)  * Where W_i,n is the n-th weight average for CPU i. The instantaneous weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7447)  * W_i,0 is defined as:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7448)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7449)  *   W_i,0 = \Sum_j w_i,j                                             (2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7450)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7451)  * Where w_i,j is the weight of the j-th runnable task on CPU i. This weight
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7452)  * is derived from the nice value as per sched_prio_to_weight[].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7453)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7454)  * The weight average is an exponential decay average of the instantaneous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7455)  * weight:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7456)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7457)  *   W'_i,n = (2^n - 1) / 2^n * W_i,n + 1 / 2^n * W_i,0               (3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7458)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7459)  * C_i is the compute capacity of CPU i, typically it is the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7460)  * fraction of 'recent' time available for SCHED_OTHER task execution. But it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7461)  * can also include other factors [XXX].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7462)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7463)  * To achieve this balance we define a measure of imbalance which follows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7464)  * directly from (1):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7465)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7466)  *   imb_i,j = max{ avg(W/C), W_i/C_i } - min{ avg(W/C), W_j/C_j }    (4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7467)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7468)  * We them move tasks around to minimize the imbalance. In the continuous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7469)  * function space it is obvious this converges, in the discrete case we get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7470)  * a few fun cases generally called infeasible weight scenarios.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7471)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7472)  * [XXX expand on:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7473)  *     - infeasible weights;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7474)  *     - local vs global optima in the discrete case. ]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7475)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7476)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7477)  * SCHED DOMAINS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7478)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7479)  * In order to solve the imbalance equation (4), and avoid the obvious O(n^2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7480)  * for all i,j solution, we create a tree of CPUs that follows the hardware
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7481)  * topology where each level pairs two lower groups (or better). This results
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7482)  * in O(log n) layers. Furthermore we reduce the number of CPUs going up the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7483)  * tree to only the first of the previous level and we decrease the frequency
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7484)  * of load-balance at each level inv. proportional to the number of CPUs in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7485)  * the groups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7486)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7487)  * This yields:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7488)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7489)  *     log_2 n     1     n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7490)  *   \Sum       { --- * --- * 2^i } = O(n)                            (5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7491)  *     i = 0      2^i   2^i
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7492)  *                               `- size of each group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7493)  *         |         |     `- number of CPUs doing load-balance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7494)  *         |         `- freq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7495)  *         `- sum over all levels
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7496)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7497)  * Coupled with a limit on how many tasks we can migrate every balance pass,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7498)  * this makes (5) the runtime complexity of the balancer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7499)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7500)  * An important property here is that each CPU is still (indirectly) connected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7501)  * to every other CPU in at most O(log n) steps:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7502)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7503)  * The adjacency matrix of the resulting graph is given by:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7504)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7505)  *             log_2 n
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7506)  *   A_i,j = \Union     (i % 2^k == 0) && i / 2^(k+1) == j / 2^(k+1)  (6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7507)  *             k = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7508)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7509)  * And you'll find that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7510)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7511)  *   A^(log_2 n)_i,j != 0  for all i,j                                (7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7512)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7513)  * Showing there's indeed a path between every CPU in at most O(log n) steps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7514)  * The task movement gives a factor of O(m), giving a convergence complexity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7515)  * of:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7516)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7517)  *   O(nm log n),  n := nr_cpus, m := nr_tasks                        (8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7518)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7519)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7520)  * WORK CONSERVING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7521)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7522)  * In order to avoid CPUs going idle while there's still work to do, new idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7523)  * balancing is more aggressive and has the newly idle CPU iterate up the domain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7524)  * tree itself instead of relying on other CPUs to bring it work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7525)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7526)  * This adds some complexity to both (5) and (8) but it reduces the total idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7527)  * time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7528)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7529)  * [XXX more?]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7530)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7531)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7532)  * CGROUPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7533)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7534)  * Cgroups make a horror show out of (2), instead of a simple sum we get:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7535)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7536)  *                                s_k,i
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7537)  *   W_i,0 = \Sum_j \Prod_k w_k * -----                               (9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7538)  *                                 S_k
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7539)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7540)  * Where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7541)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7542)  *   s_k,i = \Sum_j w_i,j,k  and  S_k = \Sum_i s_k,i                 (10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7543)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7544)  * w_i,j,k is the weight of the j-th runnable task in the k-th cgroup on CPU i.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7545)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7546)  * The big problem is S_k, its a global sum needed to compute a local (W_i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7547)  * property.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7548)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7549)  * [XXX write more on how we solve this.. _after_ merging pjt's patches that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7550)  *      rewrite all of this once again.]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7551)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7553) unsigned long __read_mostly max_load_balance_interval = HZ/10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7554) EXPORT_SYMBOL_GPL(max_load_balance_interval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7556) enum fbq_type { regular, remote, all };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7558) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7559)  * 'group_type' describes the group of CPUs at the moment of load balancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7560)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7561)  * The enum is ordered by pulling priority, with the group with lowest priority
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7562)  * first so the group_type can simply be compared when selecting the busiest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7563)  * group. See update_sd_pick_busiest().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7564)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7565) enum group_type {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7566) 	/* The group has spare capacity that can be used to run more tasks.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7567) 	group_has_spare = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7568) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7569) 	 * The group is fully used and the tasks don't compete for more CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7570) 	 * cycles. Nevertheless, some tasks might wait before running.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7571) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7572) 	group_fully_busy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7573) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7574) 	 * SD_ASYM_CPUCAPACITY only: One task doesn't fit with CPU's capacity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7575) 	 * and must be migrated to a more powerful CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7576) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7577) 	group_misfit_task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7578) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7579) 	 * SD_ASYM_PACKING only: One local CPU with higher capacity is available,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7580) 	 * and the task should be migrated to it instead of running on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7581) 	 * current CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7582) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7583) 	group_asym_packing,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7584) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7585) 	 * The tasks' affinity constraints previously prevented the scheduler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7586) 	 * from balancing the load across the system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7587) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7588) 	group_imbalanced,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7589) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7590) 	 * The CPU is overloaded and can't provide expected CPU cycles to all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7591) 	 * tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7592) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7593) 	group_overloaded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7594) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7596) enum migration_type {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7597) 	migrate_load = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7598) 	migrate_util,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7599) 	migrate_task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7600) 	migrate_misfit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7601) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7603) #define LBF_ALL_PINNED	0x01
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7604) #define LBF_NEED_BREAK	0x02
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7605) #define LBF_DST_PINNED  0x04
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7606) #define LBF_SOME_PINNED	0x08
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7607) #define LBF_NOHZ_STATS	0x10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7608) #define LBF_NOHZ_AGAIN	0x20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7610) struct lb_env {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7611) 	struct sched_domain	*sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7613) 	struct rq		*src_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7614) 	int			src_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7615) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7616) 	int			dst_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7617) 	struct rq		*dst_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7619) 	struct cpumask		*dst_grpmask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7620) 	int			new_dst_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7621) 	enum cpu_idle_type	idle;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7622) 	long			imbalance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7623) 	/* The set of CPUs under consideration for load-balancing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7624) 	struct cpumask		*cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7626) 	unsigned int		flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7628) 	unsigned int		loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7629) 	unsigned int		loop_break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7630) 	unsigned int		loop_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7632) 	enum fbq_type		fbq_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7633) 	enum migration_type	migration_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7634) 	struct list_head	tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7635) 	struct rq_flags		*src_rq_rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7636) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7638) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7639)  * Is this task likely cache-hot:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7640)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7641) static int task_hot(struct task_struct *p, struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7643) 	s64 delta;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7645) 	lockdep_assert_held(&env->src_rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7647) 	if (p->sched_class != &fair_sched_class)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7648) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7650) 	if (unlikely(task_has_idle_policy(p)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7651) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7653) 	/* SMT siblings share cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7654) 	if (env->sd->flags & SD_SHARE_CPUCAPACITY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7655) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7657) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7658) 	 * Buddy candidates are cache hot:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7659) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7660) 	if (sched_feat(CACHE_HOT_BUDDY) && env->dst_rq->nr_running &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7661) 			(&p->se == cfs_rq_of(&p->se)->next ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7662) 			 &p->se == cfs_rq_of(&p->se)->last))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7663) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7665) 	if (sysctl_sched_migration_cost == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7666) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7667) 	if (sysctl_sched_migration_cost == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7668) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7670) 	delta = rq_clock_task(env->src_rq) - p->se.exec_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7672) 	return delta < (s64)sysctl_sched_migration_cost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7675) #ifdef CONFIG_NUMA_BALANCING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7676) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7677)  * Returns 1, if task migration degrades locality
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7678)  * Returns 0, if task migration improves locality i.e migration preferred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7679)  * Returns -1, if task migration is not affected by locality.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7680)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7681) static int migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7682) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7683) 	struct numa_group *numa_group = rcu_dereference(p->numa_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7684) 	unsigned long src_weight, dst_weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7685) 	int src_nid, dst_nid, dist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7687) 	if (!static_branch_likely(&sched_numa_balancing))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7688) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7690) 	if (!p->numa_faults || !(env->sd->flags & SD_NUMA))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7691) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7693) 	src_nid = cpu_to_node(env->src_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7694) 	dst_nid = cpu_to_node(env->dst_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7696) 	if (src_nid == dst_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7697) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7699) 	/* Migrating away from the preferred node is always bad. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7700) 	if (src_nid == p->numa_preferred_nid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7701) 		if (env->src_rq->nr_running > env->src_rq->nr_preferred_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7702) 			return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7703) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7704) 			return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7705) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7707) 	/* Encourage migration to the preferred node. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7708) 	if (dst_nid == p->numa_preferred_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7709) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7711) 	/* Leaving a core idle is often worse than degrading locality. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7712) 	if (env->idle == CPU_IDLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7713) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7715) 	dist = node_distance(src_nid, dst_nid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7716) 	if (numa_group) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7717) 		src_weight = group_weight(p, src_nid, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7718) 		dst_weight = group_weight(p, dst_nid, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7719) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7720) 		src_weight = task_weight(p, src_nid, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7721) 		dst_weight = task_weight(p, dst_nid, dist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7722) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7724) 	return dst_weight < src_weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7727) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7728) static inline int migrate_degrades_locality(struct task_struct *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7729) 					     struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7730) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7731) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7733) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7735) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7736)  * can_migrate_task - may task p from runqueue rq be migrated to this_cpu?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7737)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7738) static
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7739) int can_migrate_task(struct task_struct *p, struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7740) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7741) 	int tsk_cache_hot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7742) 	int can_migrate = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7744) 	lockdep_assert_held(&env->src_rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7746) 	trace_android_rvh_can_migrate_task(p, env->dst_cpu, &can_migrate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7747) 	if (!can_migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7748) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7750) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7751) 	 * We do not migrate tasks that are:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7752) 	 * 1) throttled_lb_pair, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7753) 	 * 2) cannot be migrated to this CPU due to cpus_ptr, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7754) 	 * 3) running (obviously), or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7755) 	 * 4) are cache-hot on their current CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7756) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7757) 	if (throttled_lb_pair(task_group(p), env->src_cpu, env->dst_cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7758) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7760) 	/* Disregard pcpu kthreads; they are where they need to be. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7761) 	if (kthread_is_per_cpu(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7762) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7764) 	if (!cpumask_test_cpu(env->dst_cpu, p->cpus_ptr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7765) 		int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7767) 		schedstat_inc(p->se.statistics.nr_failed_migrations_affine);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7769) 		env->flags |= LBF_SOME_PINNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7771) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7772) 		 * Remember if this task can be migrated to any other CPU in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7773) 		 * our sched_group. We may want to revisit it if we couldn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7774) 		 * meet load balance goals by pulling other tasks on src_cpu.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7775) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7776) 		 * Avoid computing new_dst_cpu for NEWLY_IDLE or if we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7777) 		 * already computed one in current iteration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7778) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7779) 		if (env->idle == CPU_NEWLY_IDLE || (env->flags & LBF_DST_PINNED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7780) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7781) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7782) 		/* Prevent to re-select dst_cpu via env's CPUs: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7783) 		for_each_cpu_and(cpu, env->dst_grpmask, env->cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7784) 			if (cpumask_test_cpu(cpu, p->cpus_ptr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7785) 				env->flags |= LBF_DST_PINNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7786) 				env->new_dst_cpu = cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7787) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7788) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7789) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7791) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7792) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7794) 	/* Record that we found atleast one task that could run on dst_cpu */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7795) 	env->flags &= ~LBF_ALL_PINNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7797) 	if (task_running(env->src_rq, p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7798) 		schedstat_inc(p->se.statistics.nr_failed_migrations_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7799) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7800) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7802) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7803) 	 * Aggressive migration if:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7804) 	 * 1) destination numa is preferred
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7805) 	 * 2) task is cache cold, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7806) 	 * 3) too many balance attempts have failed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7807) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7808) 	tsk_cache_hot = migrate_degrades_locality(p, env);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7809) 	if (tsk_cache_hot == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7810) 		tsk_cache_hot = task_hot(p, env);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7812) 	if (tsk_cache_hot <= 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7813) 	    env->sd->nr_balance_failed > env->sd->cache_nice_tries) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7814) 		if (tsk_cache_hot == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7815) 			schedstat_inc(env->sd->lb_hot_gained[env->idle]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7816) 			schedstat_inc(p->se.statistics.nr_forced_migrations);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7817) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7818) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7819) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7821) 	schedstat_inc(p->se.statistics.nr_failed_migrations_hot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7822) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7824) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7825) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7826)  * detach_task() -- detach the task for the migration specified in env
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7827)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7828) static void detach_task(struct task_struct *p, struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7829) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7830) 	int detached = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7832) 	lockdep_assert_held(&env->src_rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7834) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7835) 	 * The vendor hook may drop the lock temporarily, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7836) 	 * pass the rq flags to unpin lock. We expect the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7837) 	 * rq lock to be held after return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7838) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7839) 	trace_android_rvh_migrate_queued_task(env->src_rq, env->src_rq_rf, p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7840) 					      env->dst_cpu, &detached);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7841) 	if (detached)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7842) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7843) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7844) 	deactivate_task(env->src_rq, p, DEQUEUE_NOCLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7845) 	set_task_cpu(p, env->dst_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7848) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7849)  * detach_one_task() -- tries to dequeue exactly one task from env->src_rq, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7850)  * part of active balancing operations within "domain".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7851)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7852)  * Returns a task if successful and NULL otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7853)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7854) static struct task_struct *detach_one_task(struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7855) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7856) 	struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7858) 	lockdep_assert_held(&env->src_rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7860) 	list_for_each_entry_reverse(p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7861) 			&env->src_rq->cfs_tasks, se.group_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7862) 		if (!can_migrate_task(p, env))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7863) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7864) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7865) 		detach_task(p, env);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7867) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7868) 		 * Right now, this is only the second place where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7869) 		 * lb_gained[env->idle] is updated (other is detach_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7870) 		 * so we can safely collect stats here rather than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7871) 		 * inside detach_tasks().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7872) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7873) 		schedstat_inc(env->sd->lb_gained[env->idle]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7874) 		return p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7875) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7876) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7879) static const unsigned int sched_nr_migrate_break = 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7881) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7882)  * detach_tasks() -- tries to detach up to imbalance load/util/tasks from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7883)  * busiest_rq, as part of a balancing operation within domain "sd".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7884)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7885)  * Returns number of detached tasks if successful and 0 otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7886)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7887) static int detach_tasks(struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7888) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7889) 	struct list_head *tasks = &env->src_rq->cfs_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7890) 	unsigned long util, load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7891) 	struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7892) 	int detached = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7894) 	lockdep_assert_held(&env->src_rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7896) 	if (env->imbalance <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7897) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7899) 	while (!list_empty(tasks)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7900) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7901) 		 * We don't want to steal all, otherwise we may be treated likewise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7902) 		 * which could at worst lead to a livelock crash.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7903) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7904) 		if (env->idle != CPU_NOT_IDLE && env->src_rq->nr_running <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7905) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7907) 		p = list_last_entry(tasks, struct task_struct, se.group_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7909) 		env->loop++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7910) 		/* We've more or less seen every task there is, call it quits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7911) 		if (env->loop > env->loop_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7912) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7914) 		/* take a breather every nr_migrate tasks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7915) 		if (env->loop > env->loop_break) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7916) 			env->loop_break += sched_nr_migrate_break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7917) 			env->flags |= LBF_NEED_BREAK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7918) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7919) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7921) 		if (!can_migrate_task(p, env))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7922) 			goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7924) 		switch (env->migration_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7925) 		case migrate_load:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7926) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7927) 			 * Depending of the number of CPUs and tasks and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7928) 			 * cgroup hierarchy, task_h_load() can return a null
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7929) 			 * value. Make sure that env->imbalance decreases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7930) 			 * otherwise detach_tasks() will stop only after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7931) 			 * detaching up to loop_max tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7932) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7933) 			load = max_t(unsigned long, task_h_load(p), 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7935) 			if (sched_feat(LB_MIN) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7936) 			    load < 16 && !env->sd->nr_balance_failed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7937) 				goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7939) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7940) 			 * Make sure that we don't migrate too much load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7941) 			 * Nevertheless, let relax the constraint if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7942) 			 * scheduler fails to find a good waiting task to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7943) 			 * migrate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7944) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7945) 			if (shr_bound(load, env->sd->nr_balance_failed) > env->imbalance)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7946) 				goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7948) 			env->imbalance -= load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7949) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7951) 		case migrate_util:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7952) 			util = task_util_est(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7954) 			if (util > env->imbalance)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7955) 				goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7957) 			env->imbalance -= util;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7958) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7960) 		case migrate_task:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7961) 			env->imbalance--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7962) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7964) 		case migrate_misfit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7965) 			/* This is not a misfit task */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7966) 			if (task_fits_capacity(p, capacity_of(env->src_cpu)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7967) 				goto next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7969) 			env->imbalance = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7970) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7971) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7972) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7973) 		detach_task(p, env);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7974) 		list_add(&p->se.group_node, &env->tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7976) 		detached++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7978) #ifdef CONFIG_PREEMPTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7979) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7980) 		 * NEWIDLE balancing is a source of latency, so preemptible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7981) 		 * kernels will stop after the first task is detached to minimize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7982) 		 * the critical section.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7983) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7984) 		if (env->idle == CPU_NEWLY_IDLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7985) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7986) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7988) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7989) 		 * We only want to steal up to the prescribed amount of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7990) 		 * load/util/tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7991) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7992) 		if (env->imbalance <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7993) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7995) 		continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7996) next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7997) 		list_move(&p->se.group_node, tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7998) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  7999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8000) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8001) 	 * Right now, this is one of only two places we collect this stat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8002) 	 * so we can safely collect detach_one_task() stats here rather
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8003) 	 * than inside detach_one_task().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8004) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8005) 	schedstat_add(env->sd->lb_gained[env->idle], detached);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8006) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8007) 	return detached;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8008) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8009) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8010) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8011)  * attach_task() -- attach the task detached by detach_task() to its new rq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8012)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8013) static void attach_task(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8014) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8015) 	lockdep_assert_held(&rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8017) 	BUG_ON(task_rq(p) != rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8018) 	activate_task(rq, p, ENQUEUE_NOCLOCK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8019) 	check_preempt_curr(rq, p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8021) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8022) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8023)  * attach_one_task() -- attaches the task returned from detach_one_task() to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8024)  * its new rq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8025)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8026) static void attach_one_task(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8027) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8028) 	struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8030) 	rq_lock(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8031) 	update_rq_clock(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8032) 	attach_task(rq, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8033) 	rq_unlock(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8036) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8037)  * attach_tasks() -- attaches all tasks detached by detach_tasks() to their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8038)  * new rq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8039)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8040) static void attach_tasks(struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8041) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8042) 	struct list_head *tasks = &env->tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8043) 	struct task_struct *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8044) 	struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8046) 	rq_lock(env->dst_rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8047) 	update_rq_clock(env->dst_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8048) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8049) 	while (!list_empty(tasks)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8050) 		p = list_first_entry(tasks, struct task_struct, se.group_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8051) 		list_del_init(&p->se.group_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8053) 		attach_task(env->dst_rq, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8054) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8056) 	rq_unlock(env->dst_rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8059) #ifdef CONFIG_NO_HZ_COMMON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8060) static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8061) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8062) 	if (cfs_rq->avg.load_avg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8063) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8065) 	if (cfs_rq->avg.util_avg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8066) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8067) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8068) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8071) static inline bool others_have_blocked(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8072) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8073) 	if (READ_ONCE(rq->avg_rt.util_avg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8074) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8076) 	if (READ_ONCE(rq->avg_dl.util_avg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8077) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8079) 	if (thermal_load_avg(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8080) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8081) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8082) #ifdef CONFIG_HAVE_SCHED_AVG_IRQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8083) 	if (READ_ONCE(rq->avg_irq.util_avg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8084) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8085) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8087) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8088) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8089) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8090) static inline void update_blocked_load_status(struct rq *rq, bool has_blocked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8091) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8092) 	rq->last_blocked_load_update_tick = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8093) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8094) 	if (!has_blocked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8095) 		rq->has_blocked_load = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8097) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8098) static inline bool cfs_rq_has_blocked(struct cfs_rq *cfs_rq) { return false; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8099) static inline bool others_have_blocked(struct rq *rq) { return false; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8100) static inline void update_blocked_load_status(struct rq *rq, bool has_blocked) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8101) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8103) static bool __update_blocked_others(struct rq *rq, bool *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8105) 	const struct sched_class *curr_class;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8106) 	u64 now = rq_clock_pelt(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8107) 	unsigned long thermal_pressure;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8108) 	bool decayed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8110) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8111) 	 * update_load_avg() can call cpufreq_update_util(). Make sure that RT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8112) 	 * DL and IRQ signals have been updated before updating CFS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8113) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8114) 	curr_class = rq->curr->sched_class;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8116) 	thermal_pressure = arch_scale_thermal_pressure(cpu_of(rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8118) 	decayed = update_rt_rq_load_avg(now, rq, curr_class == &rt_sched_class) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8119) 		  update_dl_rq_load_avg(now, rq, curr_class == &dl_sched_class) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8120) 		  update_thermal_load_avg(rq_clock_thermal(rq), rq, thermal_pressure) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8121) 		  update_irq_load_avg(rq, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8123) 	if (others_have_blocked(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8124) 		*done = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8126) 	return decayed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8129) #ifdef CONFIG_FAIR_GROUP_SCHED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8131) static inline bool cfs_rq_is_decayed(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8132) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8133) 	if (cfs_rq->load.weight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8134) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8136) 	if (cfs_rq->avg.load_sum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8137) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8139) 	if (cfs_rq->avg.util_sum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8140) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8142) 	if (cfs_rq->avg.runnable_sum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8143) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8145) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8148) static bool __update_blocked_fair(struct rq *rq, bool *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8149) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8150) 	struct cfs_rq *cfs_rq, *pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8151) 	bool decayed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8152) 	int cpu = cpu_of(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8154) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8155) 	 * Iterates the task_group tree in a bottom up fashion, see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8156) 	 * list_add_leaf_cfs_rq() for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8157) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8158) 	for_each_leaf_cfs_rq_safe(rq, cfs_rq, pos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8159) 		struct sched_entity *se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8161) 		if (update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8162) 			update_tg_load_avg(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8164) 			if (cfs_rq == &rq->cfs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8165) 				decayed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8166) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8168) 		/* Propagate pending load changes to the parent, if any: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8169) 		se = cfs_rq->tg->se[cpu];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8170) 		if (se && !skip_blocked_update(se))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8171) 			update_load_avg(cfs_rq_of(se), se, UPDATE_TG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8173) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8174) 		 * There can be a lot of idle CPU cgroups.  Don't let fully
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8175) 		 * decayed cfs_rqs linger on the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8176) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8177) 		if (cfs_rq_is_decayed(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8178) 			list_del_leaf_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8180) 		/* Don't need periodic decay once load/util_avg are null */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8181) 		if (cfs_rq_has_blocked(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8182) 			*done = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8183) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8185) 	return decayed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8188) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8189)  * Compute the hierarchical load factor for cfs_rq and all its ascendants.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8190)  * This needs to be done in a top-down fashion because the load of a child
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8191)  * group is a fraction of its parents load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8192)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8193) static void update_cfs_rq_h_load(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8195) 	struct rq *rq = rq_of(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8196) 	struct sched_entity *se = cfs_rq->tg->se[cpu_of(rq)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8197) 	unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8198) 	unsigned long load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8200) 	if (cfs_rq->last_h_load_update == now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8201) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8203) 	WRITE_ONCE(cfs_rq->h_load_next, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8204) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8205) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8206) 		WRITE_ONCE(cfs_rq->h_load_next, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8207) 		if (cfs_rq->last_h_load_update == now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8208) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8209) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8211) 	if (!se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8212) 		cfs_rq->h_load = cfs_rq_load_avg(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8213) 		cfs_rq->last_h_load_update = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8214) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8216) 	while ((se = READ_ONCE(cfs_rq->h_load_next)) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8217) 		load = cfs_rq->h_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8218) 		load = div64_ul(load * se->avg.load_avg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8219) 			cfs_rq_load_avg(cfs_rq) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8220) 		cfs_rq = group_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8221) 		cfs_rq->h_load = load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8222) 		cfs_rq->last_h_load_update = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8223) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8226) static unsigned long task_h_load(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8228) 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8230) 	update_cfs_rq_h_load(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8231) 	return div64_ul(p->se.avg.load_avg * cfs_rq->h_load,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8232) 			cfs_rq_load_avg(cfs_rq) + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8234) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8235) static bool __update_blocked_fair(struct rq *rq, bool *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8237) 	struct cfs_rq *cfs_rq = &rq->cfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8238) 	bool decayed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8240) 	decayed = update_cfs_rq_load_avg(cfs_rq_clock_pelt(cfs_rq), cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8241) 	if (cfs_rq_has_blocked(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8242) 		*done = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8244) 	return decayed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8247) static unsigned long task_h_load(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8248) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8249) 	return p->se.avg.load_avg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8250) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8251) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8253) static void update_blocked_averages(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8255) 	bool decayed = false, done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8256) 	struct rq *rq = cpu_rq(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8257) 	struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8259) 	rq_lock_irqsave(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8260) 	update_rq_clock(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8262) 	decayed |= __update_blocked_others(rq, &done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8263) 	decayed |= __update_blocked_fair(rq, &done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8265) 	update_blocked_load_status(rq, !done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8266) 	if (decayed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8267) 		cpufreq_update_util(rq, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8268) 	rq_unlock_irqrestore(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8269) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8271) /********** Helpers for find_busiest_group ************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8273) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8274)  * sg_lb_stats - stats of a sched_group required for load_balancing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8275)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8276) struct sg_lb_stats {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8277) 	unsigned long avg_load; /*Avg load across the CPUs of the group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8278) 	unsigned long group_load; /* Total load over the CPUs of the group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8279) 	unsigned long group_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8280) 	unsigned long group_util; /* Total utilization over the CPUs of the group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8281) 	unsigned long group_runnable; /* Total runnable time over the CPUs of the group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8282) 	unsigned int sum_nr_running; /* Nr of tasks running in the group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8283) 	unsigned int sum_h_nr_running; /* Nr of CFS tasks running in the group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8284) 	unsigned int idle_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8285) 	unsigned int group_weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8286) 	enum group_type group_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8287) 	unsigned int group_asym_packing; /* Tasks should be moved to preferred CPU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8288) 	unsigned long group_misfit_task_load; /* A CPU has a task too big for its capacity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8289) #ifdef CONFIG_NUMA_BALANCING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8290) 	unsigned int nr_numa_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8291) 	unsigned int nr_preferred_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8292) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8293) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8296)  * sd_lb_stats - Structure to store the statistics of a sched_domain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8297)  *		 during load balancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8298)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8299) struct sd_lb_stats {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8300) 	struct sched_group *busiest;	/* Busiest group in this sd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8301) 	struct sched_group *local;	/* Local group in this sd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8302) 	unsigned long total_load;	/* Total load of all groups in sd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8303) 	unsigned long total_capacity;	/* Total capacity of all groups in sd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8304) 	unsigned long avg_load;	/* Average load across all groups in sd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8305) 	unsigned int prefer_sibling; /* tasks should go to sibling first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8307) 	struct sg_lb_stats busiest_stat;/* Statistics of the busiest group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8308) 	struct sg_lb_stats local_stat;	/* Statistics of the local group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8309) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8311) static inline void init_sd_lb_stats(struct sd_lb_stats *sds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8312) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8313) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8314) 	 * Skimp on the clearing to avoid duplicate work. We can avoid clearing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8315) 	 * local_stat because update_sg_lb_stats() does a full clear/assignment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8316) 	 * We must however set busiest_stat::group_type and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8317) 	 * busiest_stat::idle_cpus to the worst busiest group because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8318) 	 * update_sd_pick_busiest() reads these before assignment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8319) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8320) 	*sds = (struct sd_lb_stats){
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8321) 		.busiest = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8322) 		.local = NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8323) 		.total_load = 0UL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8324) 		.total_capacity = 0UL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8325) 		.busiest_stat = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8326) 			.idle_cpus = UINT_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8327) 			.group_type = group_has_spare,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8328) 		},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8329) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8332) static unsigned long scale_rt_capacity(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8333) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8334) 	struct rq *rq = cpu_rq(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8335) 	unsigned long max = arch_scale_cpu_capacity(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8336) 	unsigned long used, free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8337) 	unsigned long irq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8339) 	irq = cpu_util_irq(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8341) 	if (unlikely(irq >= max))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8342) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8344) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8345) 	 * avg_rt.util_avg and avg_dl.util_avg track binary signals
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8346) 	 * (running and not running) with weights 0 and 1024 respectively.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8347) 	 * avg_thermal.load_avg tracks thermal pressure and the weighted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8348) 	 * average uses the actual delta max capacity(load).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8349) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8350) 	used = READ_ONCE(rq->avg_rt.util_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8351) 	used += READ_ONCE(rq->avg_dl.util_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8352) 	used += thermal_load_avg(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8354) 	if (unlikely(used >= max))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8355) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8357) 	free = max - used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8359) 	return scale_irq_capacity(free, irq, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8362) static void update_cpu_capacity(struct sched_domain *sd, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8363) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8364) 	unsigned long capacity = scale_rt_capacity(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8365) 	struct sched_group *sdg = sd->groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8367) 	cpu_rq(cpu)->cpu_capacity_orig = arch_scale_cpu_capacity(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8369) 	if (!capacity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8370) 		capacity = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8372) 	trace_android_rvh_update_cpu_capacity(cpu, &capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8373) 	cpu_rq(cpu)->cpu_capacity = capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8374) 	trace_sched_cpu_capacity_tp(cpu_rq(cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8376) 	sdg->sgc->capacity = capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8377) 	sdg->sgc->min_capacity = capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8378) 	sdg->sgc->max_capacity = capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8381) void update_group_capacity(struct sched_domain *sd, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8382) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8383) 	struct sched_domain *child = sd->child;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8384) 	struct sched_group *group, *sdg = sd->groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8385) 	unsigned long capacity, min_capacity, max_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8386) 	unsigned long interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8388) 	interval = msecs_to_jiffies(sd->balance_interval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8389) 	interval = clamp(interval, 1UL, max_load_balance_interval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8390) 	sdg->sgc->next_update = jiffies + interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8392) 	if (!child) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8393) 		update_cpu_capacity(sd, cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8394) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8395) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8397) 	capacity = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8398) 	min_capacity = ULONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8399) 	max_capacity = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8401) 	if (child->flags & SD_OVERLAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8402) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8403) 		 * SD_OVERLAP domains cannot assume that child groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8404) 		 * span the current group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8405) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8407) 		for_each_cpu(cpu, sched_group_span(sdg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8408) 			unsigned long cpu_cap = capacity_of(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8410) 			capacity += cpu_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8411) 			min_capacity = min(cpu_cap, min_capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8412) 			max_capacity = max(cpu_cap, max_capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8413) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8414) 	} else  {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8415) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8416) 		 * !SD_OVERLAP domains can assume that child groups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8417) 		 * span the current group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8418) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8420) 		group = child->groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8421) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8422) 			struct sched_group_capacity *sgc = group->sgc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8424) 			capacity += sgc->capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8425) 			min_capacity = min(sgc->min_capacity, min_capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8426) 			max_capacity = max(sgc->max_capacity, max_capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8427) 			group = group->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8428) 		} while (group != child->groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8429) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8431) 	sdg->sgc->capacity = capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8432) 	sdg->sgc->min_capacity = min_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8433) 	sdg->sgc->max_capacity = max_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8436) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8437)  * Check whether the capacity of the rq has been noticeably reduced by side
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8438)  * activity. The imbalance_pct is used for the threshold.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8439)  * Return true is the capacity is reduced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8440)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8441) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8442) check_cpu_capacity(struct rq *rq, struct sched_domain *sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8443) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8444) 	return ((rq->cpu_capacity * sd->imbalance_pct) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8445) 				(rq->cpu_capacity_orig * 100));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8449)  * Check whether a rq has a misfit task and if it looks like we can actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8450)  * help that task: we can migrate the task to a CPU of higher capacity, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8451)  * the task's current CPU is heavily pressured.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8452)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8453) static inline int check_misfit_status(struct rq *rq, struct sched_domain *sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8455) 	return rq->misfit_task_load &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8456) 		(rq->cpu_capacity_orig < rq->rd->max_cpu_capacity ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8457) 		 check_cpu_capacity(rq, sd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8460) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8461)  * Group imbalance indicates (and tries to solve) the problem where balancing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8462)  * groups is inadequate due to ->cpus_ptr constraints.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8463)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8464)  * Imagine a situation of two groups of 4 CPUs each and 4 tasks each with a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8465)  * cpumask covering 1 CPU of the first group and 3 CPUs of the second group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8466)  * Something like:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8467)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8468)  *	{ 0 1 2 3 } { 4 5 6 7 }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8469)  *	        *     * * *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8470)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8471)  * If we were to balance group-wise we'd place two tasks in the first group and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8472)  * two tasks in the second group. Clearly this is undesired as it will overload
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8473)  * cpu 3 and leave one of the CPUs in the second group unused.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8474)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8475)  * The current solution to this issue is detecting the skew in the first group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8476)  * by noticing the lower domain failed to reach balance and had difficulty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8477)  * moving tasks due to affinity constraints.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8478)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8479)  * When this is so detected; this group becomes a candidate for busiest; see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8480)  * update_sd_pick_busiest(). And calculate_imbalance() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8481)  * find_busiest_group() avoid some of the usual balance conditions to allow it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8482)  * to create an effective group imbalance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8483)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8484)  * This is a somewhat tricky proposition since the next run might not find the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8485)  * group imbalance and decide the groups need to be balanced again. A most
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8486)  * subtle and fragile situation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8487)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8489) static inline int sg_imbalanced(struct sched_group *group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8491) 	return group->sgc->imbalance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8494) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8495)  * group_has_capacity returns true if the group has spare capacity that could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8496)  * be used by some tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8497)  * We consider that a group has spare capacity if the  * number of task is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8498)  * smaller than the number of CPUs or if the utilization is lower than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8499)  * available capacity for CFS tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8500)  * For the latter, we use a threshold to stabilize the state, to take into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8501)  * account the variance of the tasks' load and to return true if the available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8502)  * capacity in meaningful for the load balancer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8503)  * As an example, an available capacity of 1% can appear but it doesn't make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8504)  * any benefit for the load balance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8505)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8506) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8507) group_has_capacity(unsigned int imbalance_pct, struct sg_lb_stats *sgs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8508) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8509) 	if (sgs->sum_nr_running < sgs->group_weight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8510) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8511) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8512) 	if ((sgs->group_capacity * imbalance_pct) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8513) 			(sgs->group_runnable * 100))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8514) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8516) 	if ((sgs->group_capacity * 100) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8517) 			(sgs->group_util * imbalance_pct))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8518) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8520) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8522) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8523) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8524)  *  group_is_overloaded returns true if the group has more tasks than it can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8525)  *  handle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8526)  *  group_is_overloaded is not equals to !group_has_capacity because a group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8527)  *  with the exact right number of tasks, has no more spare capacity but is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8528)  *  overloaded so both group_has_capacity and group_is_overloaded return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8529)  *  false.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8530)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8531) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8532) group_is_overloaded(unsigned int imbalance_pct, struct sg_lb_stats *sgs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8533) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8534) 	if (sgs->sum_nr_running <= sgs->group_weight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8535) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8537) 	if ((sgs->group_capacity * 100) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8538) 			(sgs->group_util * imbalance_pct))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8539) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8541) 	if ((sgs->group_capacity * imbalance_pct) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8542) 			(sgs->group_runnable * 100))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8543) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8545) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8548) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8549)  * group_smaller_min_cpu_capacity: Returns true if sched_group sg has smaller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8550)  * per-CPU capacity than sched_group ref.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8551)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8552) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8553) group_smaller_min_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8554) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8555) 	return fits_capacity(sg->sgc->min_capacity, ref->sgc->min_capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8558) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8559)  * group_smaller_max_cpu_capacity: Returns true if sched_group sg has smaller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8560)  * per-CPU capacity_orig than sched_group ref.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8561)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8562) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8563) group_smaller_max_cpu_capacity(struct sched_group *sg, struct sched_group *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8564) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8565) 	return fits_capacity(sg->sgc->max_capacity, ref->sgc->max_capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8568) static inline enum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8569) group_type group_classify(unsigned int imbalance_pct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8570) 			  struct sched_group *group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8571) 			  struct sg_lb_stats *sgs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8572) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8573) 	if (group_is_overloaded(imbalance_pct, sgs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8574) 		return group_overloaded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8576) 	if (sg_imbalanced(group))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8577) 		return group_imbalanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8579) 	if (sgs->group_asym_packing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8580) 		return group_asym_packing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8582) 	if (sgs->group_misfit_task_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8583) 		return group_misfit_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8585) 	if (!group_has_capacity(imbalance_pct, sgs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8586) 		return group_fully_busy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8588) 	return group_has_spare;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8591) static bool update_nohz_stats(struct rq *rq, bool force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8593) #ifdef CONFIG_NO_HZ_COMMON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8594) 	unsigned int cpu = rq->cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8596) 	if (!rq->has_blocked_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8597) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8599) 	if (!cpumask_test_cpu(cpu, nohz.idle_cpus_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8600) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8602) 	if (!force && !time_after(jiffies, rq->last_blocked_load_update_tick))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8603) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8605) 	update_blocked_averages(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8607) 	return rq->has_blocked_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8608) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8609) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8610) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8613) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8614)  * update_sg_lb_stats - Update sched_group's statistics for load balancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8615)  * @env: The load balancing environment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8616)  * @group: sched_group whose statistics are to be updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8617)  * @sgs: variable to hold the statistics for this group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8618)  * @sg_status: Holds flag indicating the status of the sched_group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8619)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8620) static inline void update_sg_lb_stats(struct lb_env *env,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8621) 				      struct sched_group *group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8622) 				      struct sg_lb_stats *sgs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8623) 				      int *sg_status)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8624) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8625) 	int i, nr_running, local_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8627) 	memset(sgs, 0, sizeof(*sgs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8629) 	local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8630) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8631) 	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8632) 		struct rq *rq = cpu_rq(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8634) 		if ((env->flags & LBF_NOHZ_STATS) && update_nohz_stats(rq, false))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8635) 			env->flags |= LBF_NOHZ_AGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8637) 		sgs->group_load += cpu_load(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8638) 		sgs->group_util += cpu_util(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8639) 		sgs->group_runnable += cpu_runnable(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8640) 		sgs->sum_h_nr_running += rq->cfs.h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8642) 		nr_running = rq->nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8643) 		sgs->sum_nr_running += nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8645) 		if (nr_running > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8646) 			*sg_status |= SG_OVERLOAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8648) 		if (cpu_overutilized(i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8649) 			*sg_status |= SG_OVERUTILIZED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8651) #ifdef CONFIG_NUMA_BALANCING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8652) 		sgs->nr_numa_running += rq->nr_numa_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8653) 		sgs->nr_preferred_running += rq->nr_preferred_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8654) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8655) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8656) 		 * No need to call idle_cpu() if nr_running is not 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8657) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8658) 		if (!nr_running && idle_cpu(i)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8659) 			sgs->idle_cpus++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8660) 			/* Idle cpu can't have misfit task */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8661) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8662) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8664) 		if (local_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8665) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8667) 		/* Check for a misfit task on the cpu */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8668) 		if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8669) 		    sgs->group_misfit_task_load < rq->misfit_task_load) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8670) 			sgs->group_misfit_task_load = rq->misfit_task_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8671) 			*sg_status |= SG_OVERLOAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8672) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8673) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8675) 	/* Check if dst CPU is idle and preferred to this group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8676) 	if (env->sd->flags & SD_ASYM_PACKING &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8677) 	    env->idle != CPU_NOT_IDLE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8678) 	    sgs->sum_h_nr_running &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8679) 	    sched_asym_prefer(env->dst_cpu, group->asym_prefer_cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8680) 		sgs->group_asym_packing = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8681) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8683) 	sgs->group_capacity = group->sgc->capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8685) 	sgs->group_weight = group->group_weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8687) 	sgs->group_type = group_classify(env->sd->imbalance_pct, group, sgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8689) 	/* Computing avg_load makes sense only when group is overloaded */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8690) 	if (sgs->group_type == group_overloaded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8691) 		sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8692) 				sgs->group_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8695) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8696)  * update_sd_pick_busiest - return 1 on busiest group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8697)  * @env: The load balancing environment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8698)  * @sds: sched_domain statistics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8699)  * @sg: sched_group candidate to be checked for being the busiest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8700)  * @sgs: sched_group statistics
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8701)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8702)  * Determine if @sg is a busier group than the previously selected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8703)  * busiest group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8704)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8705)  * Return: %true if @sg is a busier group than the previously selected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8706)  * busiest group. %false otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8707)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8708) static bool update_sd_pick_busiest(struct lb_env *env,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8709) 				   struct sd_lb_stats *sds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8710) 				   struct sched_group *sg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8711) 				   struct sg_lb_stats *sgs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8713) 	struct sg_lb_stats *busiest = &sds->busiest_stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8715) 	/* Make sure that there is at least one task to pull */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8716) 	if (!sgs->sum_h_nr_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8717) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8719) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8720) 	 * Don't try to pull misfit tasks we can't help.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8721) 	 * We can use max_capacity here as reduction in capacity on some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8722) 	 * CPUs in the group should either be possible to resolve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8723) 	 * internally or be covered by avg_load imbalance (eventually).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8724) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8725) 	if (sgs->group_type == group_misfit_task &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8726) 	    (!group_smaller_max_cpu_capacity(sg, sds->local) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8727) 	     sds->local_stat.group_type != group_has_spare))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8728) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8730) 	if (sgs->group_type > busiest->group_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8731) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8733) 	if (sgs->group_type < busiest->group_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8734) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8736) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8737) 	 * The candidate and the current busiest group are the same type of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8738) 	 * group. Let check which one is the busiest according to the type.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8739) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8741) 	switch (sgs->group_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8742) 	case group_overloaded:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8743) 		/* Select the overloaded group with highest avg_load. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8744) 		if (sgs->avg_load <= busiest->avg_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8745) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8746) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8748) 	case group_imbalanced:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8749) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8750) 		 * Select the 1st imbalanced group as we don't have any way to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8751) 		 * choose one more than another.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8752) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8753) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8755) 	case group_asym_packing:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8756) 		/* Prefer to move from lowest priority CPU's work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8757) 		if (sched_asym_prefer(sg->asym_prefer_cpu, sds->busiest->asym_prefer_cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8758) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8759) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8761) 	case group_misfit_task:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8762) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8763) 		 * If we have more than one misfit sg go with the biggest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8764) 		 * misfit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8765) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8766) 		if (sgs->group_misfit_task_load < busiest->group_misfit_task_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8767) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8768) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8770) 	case group_fully_busy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8771) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8772) 		 * Select the fully busy group with highest avg_load. In
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8773) 		 * theory, there is no need to pull task from such kind of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8774) 		 * group because tasks have all compute capacity that they need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8775) 		 * but we can still improve the overall throughput by reducing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8776) 		 * contention when accessing shared HW resources.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8777) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8778) 		 * XXX for now avg_load is not computed and always 0 so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8779) 		 * select the 1st one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8780) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8781) 		if (sgs->avg_load <= busiest->avg_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8782) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8783) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8785) 	case group_has_spare:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8786) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8787) 		 * Select not overloaded group with lowest number of idle cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8788) 		 * and highest number of running tasks. We could also compare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8789) 		 * the spare capacity which is more stable but it can end up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8790) 		 * that the group has less spare capacity but finally more idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8791) 		 * CPUs which means less opportunity to pull tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8792) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8793) 		if (sgs->idle_cpus > busiest->idle_cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8794) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8795) 		else if ((sgs->idle_cpus == busiest->idle_cpus) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8796) 			 (sgs->sum_nr_running <= busiest->sum_nr_running))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8797) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8799) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8800) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8802) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8803) 	 * Candidate sg has no more than one task per CPU and has higher
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8804) 	 * per-CPU capacity. Migrating tasks to less capable CPUs may harm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8805) 	 * throughput. Maximize throughput, power/energy consequences are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8806) 	 * considered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8807) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8808) 	if ((env->sd->flags & SD_ASYM_CPUCAPACITY) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8809) 	    (sgs->group_type <= group_fully_busy) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8810) 	    (group_smaller_min_cpu_capacity(sds->local, sg)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8811) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8813) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8816) #ifdef CONFIG_NUMA_BALANCING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8817) static inline enum fbq_type fbq_classify_group(struct sg_lb_stats *sgs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8818) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8819) 	if (sgs->sum_h_nr_running > sgs->nr_numa_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8820) 		return regular;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8821) 	if (sgs->sum_h_nr_running > sgs->nr_preferred_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8822) 		return remote;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8823) 	return all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8826) static inline enum fbq_type fbq_classify_rq(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8827) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8828) 	if (rq->nr_running > rq->nr_numa_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8829) 		return regular;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8830) 	if (rq->nr_running > rq->nr_preferred_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8831) 		return remote;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8832) 	return all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8834) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8835) static inline enum fbq_type fbq_classify_group(struct sg_lb_stats *sgs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8836) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8837) 	return all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8840) static inline enum fbq_type fbq_classify_rq(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8841) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8842) 	return regular;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8844) #endif /* CONFIG_NUMA_BALANCING */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8846) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8847) struct sg_lb_stats;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8849) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8850)  * task_running_on_cpu - return 1 if @p is running on @cpu.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8851)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8853) static unsigned int task_running_on_cpu(int cpu, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8855) 	/* Task has no contribution or is new */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8856) 	if (cpu != task_cpu(p) || !READ_ONCE(p->se.avg.last_update_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8857) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8859) 	if (task_on_rq_queued(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8860) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8862) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8864) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8865) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8866)  * idle_cpu_without - would a given CPU be idle without p ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8867)  * @cpu: the processor on which idleness is tested.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8868)  * @p: task which should be ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8869)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8870)  * Return: 1 if the CPU would be idle. 0 otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8871)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8872) static int idle_cpu_without(int cpu, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8874) 	struct rq *rq = cpu_rq(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8876) 	if (rq->curr != rq->idle && rq->curr != p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8877) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8879) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8880) 	 * rq->nr_running can't be used but an updated version without the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8881) 	 * impact of p on cpu must be used instead. The updated nr_running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8882) 	 * be computed and tested before calling idle_cpu_without().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8883) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8885) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8886) 	if (rq->ttwu_pending)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8887) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8888) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8890) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8893) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8894)  * update_sg_wakeup_stats - Update sched_group's statistics for wakeup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8895)  * @sd: The sched_domain level to look for idlest group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8896)  * @group: sched_group whose statistics are to be updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8897)  * @sgs: variable to hold the statistics for this group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8898)  * @p: The task for which we look for the idlest group/CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8899)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8900) static inline void update_sg_wakeup_stats(struct sched_domain *sd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8901) 					  struct sched_group *group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8902) 					  struct sg_lb_stats *sgs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8903) 					  struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8905) 	int i, nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8907) 	memset(sgs, 0, sizeof(*sgs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8909) 	for_each_cpu(i, sched_group_span(group)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8910) 		struct rq *rq = cpu_rq(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8911) 		unsigned int local;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8912) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8913) 		sgs->group_load += cpu_load_without(rq, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8914) 		sgs->group_util += cpu_util_without(i, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8915) 		sgs->group_runnable += cpu_runnable_without(rq, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8916) 		local = task_running_on_cpu(i, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8917) 		sgs->sum_h_nr_running += rq->cfs.h_nr_running - local;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8919) 		nr_running = rq->nr_running - local;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8920) 		sgs->sum_nr_running += nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8922) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8923) 		 * No need to call idle_cpu_without() if nr_running is not 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8924) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8925) 		if (!nr_running && idle_cpu_without(i, p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8926) 			sgs->idle_cpus++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8928) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8930) 	/* Check if task fits in the group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8931) 	if (sd->flags & SD_ASYM_CPUCAPACITY &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8932) 	    !task_fits_capacity(p, group->sgc->max_capacity)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8933) 		sgs->group_misfit_task_load = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8934) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8936) 	sgs->group_capacity = group->sgc->capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8938) 	sgs->group_weight = group->group_weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8940) 	sgs->group_type = group_classify(sd->imbalance_pct, group, sgs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8942) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8943) 	 * Computing avg_load makes sense only when group is fully busy or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8944) 	 * overloaded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8945) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8946) 	if (sgs->group_type == group_fully_busy ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8947) 		sgs->group_type == group_overloaded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8948) 		sgs->avg_load = (sgs->group_load * SCHED_CAPACITY_SCALE) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8949) 				sgs->group_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8951) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8952) static bool update_pick_idlest(struct sched_group *idlest,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8953) 			       struct sg_lb_stats *idlest_sgs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8954) 			       struct sched_group *group,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8955) 			       struct sg_lb_stats *sgs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8956) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8957) 	if (sgs->group_type < idlest_sgs->group_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8958) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8960) 	if (sgs->group_type > idlest_sgs->group_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8961) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8963) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8964) 	 * The candidate and the current idlest group are the same type of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8965) 	 * group. Let check which one is the idlest according to the type.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8966) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8968) 	switch (sgs->group_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8969) 	case group_overloaded:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8970) 	case group_fully_busy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8971) 		/* Select the group with lowest avg_load. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8972) 		if (idlest_sgs->avg_load <= sgs->avg_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8973) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8974) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8976) 	case group_imbalanced:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8977) 	case group_asym_packing:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8978) 		/* Those types are not used in the slow wakeup path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8979) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8980) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8981) 	case group_misfit_task:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8982) 		/* Select group with the highest max capacity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8983) 		if (idlest->sgc->max_capacity >= group->sgc->max_capacity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8984) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8985) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8986) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8987) 	case group_has_spare:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8988) 		/* Select group with most idle CPUs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8989) 		if (idlest_sgs->idle_cpus > sgs->idle_cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8990) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8992) 		/* Select group with lowest group_util */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8993) 		if (idlest_sgs->idle_cpus == sgs->idle_cpus &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8994) 			idlest_sgs->group_util <= sgs->group_util)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8995) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8997) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8998) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  8999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9000) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9003) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9004)  * find_idlest_group() finds and returns the least busy CPU group within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9005)  * domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9006)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9007)  * Assumes p is allowed on at least one CPU in sd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9008)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9009) static struct sched_group *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9010) find_idlest_group(struct sched_domain *sd, struct task_struct *p, int this_cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9011) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9012) 	struct sched_group *idlest = NULL, *local = NULL, *group = sd->groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9013) 	struct sg_lb_stats local_sgs, tmp_sgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9014) 	struct sg_lb_stats *sgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9015) 	unsigned long imbalance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9016) 	struct sg_lb_stats idlest_sgs = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9017) 			.avg_load = UINT_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9018) 			.group_type = group_overloaded,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9019) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9020) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9021) 	imbalance = scale_load_down(NICE_0_LOAD) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9022) 				(sd->imbalance_pct-100) / 100;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9024) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9025) 		int local_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9026) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9027) 		if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9028) 			struct root_domain *rd = cpu_rq(this_cpu)->rd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9029) 			struct cpumask *cpub_mask = rockchip_perf_get_cpub_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9030) 			int level = rockchip_perf_get_level();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9031) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9032) 			if ((level == ROCKCHIP_PERFORMANCE_HIGH) && !READ_ONCE(rd->overutilized) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9033) 			    cpub_mask && cpumask_intersects(p->cpus_ptr, cpub_mask) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9034) 			    !cpumask_intersects(sched_group_span(group), cpub_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9035) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9036) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9038) 		/* Skip over this group if it has no CPUs allowed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9039) 		if (!cpumask_intersects(sched_group_span(group),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9040) 					p->cpus_ptr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9041) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9042) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9043) 		local_group = cpumask_test_cpu(this_cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9044) 					       sched_group_span(group));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9046) 		if (local_group) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9047) 			sgs = &local_sgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9048) 			local = group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9049) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9050) 			sgs = &tmp_sgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9051) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9053) 		update_sg_wakeup_stats(sd, group, sgs, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9054) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9055) 		if (!local_group && update_pick_idlest(idlest, &idlest_sgs, group, sgs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9056) 			idlest = group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9057) 			idlest_sgs = *sgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9058) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9059) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9060) 	} while (group = group->next, group != sd->groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9063) 	/* There is no idlest group to push tasks to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9064) 	if (!idlest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9065) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9067) 	/* The local group has been skipped because of CPU affinity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9068) 	if (!local)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9069) 		return idlest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9071) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9072) 	 * If the local group is idler than the selected idlest group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9073) 	 * don't try and push the task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9074) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9075) 	if (local_sgs.group_type < idlest_sgs.group_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9076) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9078) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9079) 	 * If the local group is busier than the selected idlest group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9080) 	 * try and push the task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9081) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9082) 	if (local_sgs.group_type > idlest_sgs.group_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9083) 		return idlest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9085) 	switch (local_sgs.group_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9086) 	case group_overloaded:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9087) 	case group_fully_busy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9088) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9089) 		 * When comparing groups across NUMA domains, it's possible for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9090) 		 * the local domain to be very lightly loaded relative to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9091) 		 * remote domains but "imbalance" skews the comparison making
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9092) 		 * remote CPUs look much more favourable. When considering
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9093) 		 * cross-domain, add imbalance to the load on the remote node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9094) 		 * and consider staying local.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9095) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9097) 		if ((sd->flags & SD_NUMA) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9098) 		    ((idlest_sgs.avg_load + imbalance) >= local_sgs.avg_load))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9099) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9101) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9102) 		 * If the local group is less loaded than the selected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9103) 		 * idlest group don't try and push any tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9104) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9105) 		if (idlest_sgs.avg_load >= (local_sgs.avg_load + imbalance))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9106) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9108) 		if (100 * local_sgs.avg_load <= sd->imbalance_pct * idlest_sgs.avg_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9109) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9110) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9112) 	case group_imbalanced:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9113) 	case group_asym_packing:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9114) 		/* Those type are not used in the slow wakeup path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9115) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9117) 	case group_misfit_task:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9118) 		/* Select group with the highest max capacity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9119) 		if (local->sgc->max_capacity >= idlest->sgc->max_capacity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9120) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9121) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9123) 	case group_has_spare:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9124) 		if (sd->flags & SD_NUMA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9125) #ifdef CONFIG_NUMA_BALANCING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9126) 			int idlest_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9127) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9128) 			 * If there is spare capacity at NUMA, try to select
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9129) 			 * the preferred node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9130) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9131) 			if (cpu_to_node(this_cpu) == p->numa_preferred_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9132) 				return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9134) 			idlest_cpu = cpumask_first(sched_group_span(idlest));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9135) 			if (cpu_to_node(idlest_cpu) == p->numa_preferred_nid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9136) 				return idlest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9137) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9138) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9139) 			 * Otherwise, keep the task on this node to stay close
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9140) 			 * its wakeup source and improve locality. If there is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9141) 			 * a real need of migration, periodic load balance will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9142) 			 * take care of it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9143) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9144) 			if (local_sgs.idle_cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9145) 				return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9146) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9148) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9149) 		 * Select group with highest number of idle CPUs. We could also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9150) 		 * compare the utilization which is more stable but it can end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9151) 		 * up that the group has less spare capacity but finally more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9152) 		 * idle CPUs which means more opportunity to run task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9153) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9154) 		if (local_sgs.idle_cpus >= idlest_sgs.idle_cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9155) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9156) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9157) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9159) 	return idlest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9162) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9163)  * update_sd_lb_stats - Update sched_domain's statistics for load balancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9164)  * @env: The load balancing environment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9165)  * @sds: variable to hold the statistics for this sched_domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9166)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9168) static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9170) 	struct sched_domain *child = env->sd->child;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9171) 	struct sched_group *sg = env->sd->groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9172) 	struct sg_lb_stats *local = &sds->local_stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9173) 	struct sg_lb_stats tmp_sgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9174) 	int sg_status = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9176) #ifdef CONFIG_NO_HZ_COMMON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9177) 	if (env->idle == CPU_NEWLY_IDLE && READ_ONCE(nohz.has_blocked))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9178) 		env->flags |= LBF_NOHZ_STATS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9179) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9181) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9182) 		struct sg_lb_stats *sgs = &tmp_sgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9183) 		int local_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9185) 		local_group = cpumask_test_cpu(env->dst_cpu, sched_group_span(sg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9186) 		if (local_group) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9187) 			sds->local = sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9188) 			sgs = local;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9190) 			if (env->idle != CPU_NEWLY_IDLE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9191) 			    time_after_eq(jiffies, sg->sgc->next_update))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9192) 				update_group_capacity(env->sd, env->dst_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9193) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9195) 		update_sg_lb_stats(env, sg, sgs, &sg_status);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9197) 		if (local_group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9198) 			goto next_group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9201) 		if (update_sd_pick_busiest(env, sds, sg, sgs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9202) 			sds->busiest = sg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9203) 			sds->busiest_stat = *sgs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9204) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9206) next_group:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9207) 		/* Now, start updating sd_lb_stats */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9208) 		sds->total_load += sgs->group_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9209) 		sds->total_capacity += sgs->group_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9211) 		sg = sg->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9212) 	} while (sg != env->sd->groups);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9214) 	/* Tag domain that child domain prefers tasks go to siblings first */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9215) 	sds->prefer_sibling = child && child->flags & SD_PREFER_SIBLING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9217) #ifdef CONFIG_NO_HZ_COMMON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9218) 	if ((env->flags & LBF_NOHZ_AGAIN) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9219) 	    cpumask_subset(nohz.idle_cpus_mask, sched_domain_span(env->sd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9221) 		WRITE_ONCE(nohz.next_blocked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9222) 			   jiffies + msecs_to_jiffies(LOAD_AVG_PERIOD));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9223) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9224) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9226) 	if (env->sd->flags & SD_NUMA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9227) 		env->fbq_type = fbq_classify_group(&sds->busiest_stat);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9229) 	if (!env->sd->parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9230) 		struct root_domain *rd = env->dst_rq->rd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9231) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9232) 		/* update overload indicator if we are at root domain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9233) 		WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9235) 		/* Update over-utilization (tipping point, U >= 0) indicator */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9236) 		WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9237) 		trace_sched_overutilized_tp(rd, sg_status & SG_OVERUTILIZED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9238) 	} else if (sg_status & SG_OVERUTILIZED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9239) 		struct root_domain *rd = env->dst_rq->rd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9241) 		WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9242) 		trace_sched_overutilized_tp(rd, SG_OVERUTILIZED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9243) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9246) static inline long adjust_numa_imbalance(int imbalance, int nr_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9247) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9248) 	unsigned int imbalance_min;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9250) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9251) 	 * Allow a small imbalance based on a simple pair of communicating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9252) 	 * tasks that remain local when the source domain is almost idle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9253) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9254) 	imbalance_min = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9255) 	if (nr_running <= imbalance_min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9256) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9258) 	return imbalance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9261) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9262)  * calculate_imbalance - Calculate the amount of imbalance present within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9263)  *			 groups of a given sched_domain during load balance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9264)  * @env: load balance environment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9265)  * @sds: statistics of the sched_domain whose imbalance is to be calculated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9266)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9267) static inline void calculate_imbalance(struct lb_env *env, struct sd_lb_stats *sds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9269) 	struct sg_lb_stats *local, *busiest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9271) 	local = &sds->local_stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9272) 	busiest = &sds->busiest_stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9274) 	if (busiest->group_type == group_misfit_task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9275) 		/* Set imbalance to allow misfit tasks to be balanced. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9276) 		env->migration_type = migrate_misfit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9277) 		env->imbalance = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9278) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9279) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9281) 	if (busiest->group_type == group_asym_packing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9282) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9283) 		 * In case of asym capacity, we will try to migrate all load to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9284) 		 * the preferred CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9285) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9286) 		env->migration_type = migrate_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9287) 		env->imbalance = busiest->sum_h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9288) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9289) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9291) 	if (busiest->group_type == group_imbalanced) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9292) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9293) 		 * In the group_imb case we cannot rely on group-wide averages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9294) 		 * to ensure CPU-load equilibrium, try to move any task to fix
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9295) 		 * the imbalance. The next load balance will take care of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9296) 		 * balancing back the system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9297) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9298) 		env->migration_type = migrate_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9299) 		env->imbalance = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9300) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9301) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9303) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9304) 	 * Try to use spare capacity of local group without overloading it or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9305) 	 * emptying busiest.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9306) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9307) 	if (local->group_type == group_has_spare) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9308) 		if ((busiest->group_type > group_fully_busy) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9309) 		    !(env->sd->flags & SD_SHARE_PKG_RESOURCES)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9310) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9311) 			 * If busiest is overloaded, try to fill spare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9312) 			 * capacity. This might end up creating spare capacity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9313) 			 * in busiest or busiest still being overloaded but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9314) 			 * there is no simple way to directly compute the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9315) 			 * amount of load to migrate in order to balance the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9316) 			 * system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9317) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9318) 			env->migration_type = migrate_util;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9319) 			env->imbalance = max(local->group_capacity, local->group_util) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9320) 					 local->group_util;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9322) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9323) 			 * In some cases, the group's utilization is max or even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9324) 			 * higher than capacity because of migrations but the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9325) 			 * local CPU is (newly) idle. There is at least one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9326) 			 * waiting task in this overloaded busiest group. Let's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9327) 			 * try to pull it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9328) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9329) 			if (env->idle != CPU_NOT_IDLE && env->imbalance == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9330) 				env->migration_type = migrate_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9331) 				env->imbalance = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9332) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9334) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9335) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9337) 		if (busiest->group_weight == 1 || sds->prefer_sibling) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9338) 			unsigned int nr_diff = busiest->sum_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9339) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9340) 			 * When prefer sibling, evenly spread running tasks on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9341) 			 * groups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9342) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9343) 			env->migration_type = migrate_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9344) 			lsub_positive(&nr_diff, local->sum_nr_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9345) 			env->imbalance = nr_diff >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9346) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9348) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9349) 			 * If there is no overload, we just want to even the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9350) 			 * idle cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9351) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9352) 			env->migration_type = migrate_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9353) 			env->imbalance = max_t(long, 0, (local->idle_cpus -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9354) 						 busiest->idle_cpus) >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9355) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9357) 		/* Consider allowing a small imbalance between NUMA groups */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9358) 		if (env->sd->flags & SD_NUMA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9359) 			env->imbalance = adjust_numa_imbalance(env->imbalance,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9360) 						busiest->sum_nr_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9362) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9363) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9365) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9366) 	 * Local is fully busy but has to take more load to relieve the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9367) 	 * busiest group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9368) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9369) 	if (local->group_type < group_overloaded) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9370) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9371) 		 * Local will become overloaded so the avg_load metrics are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9372) 		 * finally needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9373) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9375) 		local->avg_load = (local->group_load * SCHED_CAPACITY_SCALE) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9376) 				  local->group_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9378) 		sds->avg_load = (sds->total_load * SCHED_CAPACITY_SCALE) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9379) 				sds->total_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9380) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9381) 		 * If the local group is more loaded than the selected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9382) 		 * busiest group don't try to pull any tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9383) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9384) 		if (local->avg_load >= busiest->avg_load) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9385) 			env->imbalance = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9386) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9387) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9388) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9390) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9391) 	 * Both group are or will become overloaded and we're trying to get all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9392) 	 * the CPUs to the average_load, so we don't want to push ourselves
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9393) 	 * above the average load, nor do we wish to reduce the max loaded CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9394) 	 * below the average load. At the same time, we also don't want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9395) 	 * reduce the group load below the group capacity. Thus we look for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9396) 	 * the minimum possible imbalance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9397) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9398) 	env->migration_type = migrate_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9399) 	env->imbalance = min(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9400) 		(busiest->avg_load - sds->avg_load) * busiest->group_capacity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9401) 		(sds->avg_load - local->avg_load) * local->group_capacity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9402) 	) / SCHED_CAPACITY_SCALE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9405) /******* find_busiest_group() helpers end here *********************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9407) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9408)  * Decision matrix according to the local and busiest group type:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9409)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9410)  * busiest \ local has_spare fully_busy misfit asym imbalanced overloaded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9411)  * has_spare        nr_idle   balanced   N/A    N/A  balanced   balanced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9412)  * fully_busy       nr_idle   nr_idle    N/A    N/A  balanced   balanced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9413)  * misfit_task      force     N/A        N/A    N/A  force      force
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9414)  * asym_packing     force     force      N/A    N/A  force      force
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9415)  * imbalanced       force     force      N/A    N/A  force      force
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9416)  * overloaded       force     force      N/A    N/A  force      avg_load
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9417)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9418)  * N/A :      Not Applicable because already filtered while updating
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9419)  *            statistics.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9420)  * balanced : The system is balanced for these 2 groups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9421)  * force :    Calculate the imbalance as load migration is probably needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9422)  * avg_load : Only if imbalance is significant enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9423)  * nr_idle :  dst_cpu is not busy and the number of idle CPUs is quite
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9424)  *            different in groups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9425)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9427) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9428)  * find_busiest_group - Returns the busiest group within the sched_domain
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9429)  * if there is an imbalance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9430)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9431)  * Also calculates the amount of runnable load which should be moved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9432)  * to restore balance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9433)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9434)  * @env: The load balancing environment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9435)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9436)  * Return:	- The busiest group if imbalance exists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9437)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9438) static struct sched_group *find_busiest_group(struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9440) 	struct sg_lb_stats *local, *busiest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9441) 	struct sd_lb_stats sds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9443) 	init_sd_lb_stats(&sds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9445) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9446) 	 * Compute the various statistics relevant for load balancing at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9447) 	 * this level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9448) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9449) 	update_sd_lb_stats(env, &sds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9451) 	if (sched_energy_enabled()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9452) 		struct root_domain *rd = env->dst_rq->rd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9453) 		int out_balance = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9455) 		trace_android_rvh_find_busiest_group(sds.busiest, env->dst_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9456) 					&out_balance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9457) 		if (rcu_dereference(rd->pd) && !READ_ONCE(rd->overutilized)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9458) 					&& out_balance)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9459) 			goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9460) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9462) 	local = &sds.local_stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9463) 	busiest = &sds.busiest_stat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9465) 	/* There is no busy sibling group to pull tasks from */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9466) 	if (!sds.busiest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9467) 		goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9469) 	/* Misfit tasks should be dealt with regardless of the avg load */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9470) 	if (busiest->group_type == group_misfit_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9471) 		goto force_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9473) 	/* ASYM feature bypasses nice load balance check */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9474) 	if (busiest->group_type == group_asym_packing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9475) 		goto force_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9477) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9478) 	 * If the busiest group is imbalanced the below checks don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9479) 	 * work because they assume all things are equal, which typically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9480) 	 * isn't true due to cpus_ptr constraints and the like.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9481) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9482) 	if (busiest->group_type == group_imbalanced)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9483) 		goto force_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9485) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9486) 	 * If the local group is busier than the selected busiest group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9487) 	 * don't try and pull any tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9488) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9489) 	if (local->group_type > busiest->group_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9490) 		goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9492) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9493) 	 * When groups are overloaded, use the avg_load to ensure fairness
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9494) 	 * between tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9495) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9496) 	if (local->group_type == group_overloaded) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9497) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9498) 		 * If the local group is more loaded than the selected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9499) 		 * busiest group don't try to pull any tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9500) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9501) 		if (local->avg_load >= busiest->avg_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9502) 			goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9504) 		/* XXX broken for overlapping NUMA groups */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9505) 		sds.avg_load = (sds.total_load * SCHED_CAPACITY_SCALE) /
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9506) 				sds.total_capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9508) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9509) 		 * Don't pull any tasks if this group is already above the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9510) 		 * domain average load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9511) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9512) 		if (local->avg_load >= sds.avg_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9513) 			goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9515) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9516) 		 * If the busiest group is more loaded, use imbalance_pct to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9517) 		 * conservative.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9518) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9519) 		if (100 * busiest->avg_load <=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9520) 				env->sd->imbalance_pct * local->avg_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9521) 			goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9522) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9524) 	/* Try to move all excess tasks to child's sibling domain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9525) 	if (sds.prefer_sibling && local->group_type == group_has_spare &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9526) 	    busiest->sum_nr_running > local->sum_nr_running + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9527) 		goto force_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9529) 	if (busiest->group_type != group_overloaded) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9530) 		if (env->idle == CPU_NOT_IDLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9531) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9532) 			 * If the busiest group is not overloaded (and as a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9533) 			 * result the local one too) but this CPU is already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9534) 			 * busy, let another idle CPU try to pull task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9535) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9536) 			goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9538) 		if (busiest->group_weight > 1 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9539) 		    local->idle_cpus <= (busiest->idle_cpus + 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9540) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9541) 			 * If the busiest group is not overloaded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9542) 			 * and there is no imbalance between this and busiest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9543) 			 * group wrt idle CPUs, it is balanced. The imbalance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9544) 			 * becomes significant if the diff is greater than 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9545) 			 * otherwise we might end up to just move the imbalance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9546) 			 * on another group. Of course this applies only if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9547) 			 * there is more than 1 CPU per group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9548) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9549) 			goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9551) 		if (busiest->sum_h_nr_running == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9552) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9553) 			 * busiest doesn't have any tasks waiting to run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9554) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9555) 			goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9556) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9558) force_balance:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9559) 	/* Looks like there is an imbalance. Compute it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9560) 	calculate_imbalance(env, &sds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9561) 	return env->imbalance ? sds.busiest : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9563) out_balanced:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9564) 	env->imbalance = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9565) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9568) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9569)  * find_busiest_queue - find the busiest runqueue among the CPUs in the group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9570)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9571) static struct rq *find_busiest_queue(struct lb_env *env,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9572) 				     struct sched_group *group)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9573) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9574) 	struct rq *busiest = NULL, *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9575) 	unsigned long busiest_util = 0, busiest_load = 0, busiest_capacity = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9576) 	unsigned int busiest_nr = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9577) 	int i, done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9579) 	trace_android_rvh_find_busiest_queue(env->dst_cpu, group, env->cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9580) 					     &busiest, &done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9581) 	if (done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9582) 		return busiest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9584) 	for_each_cpu_and(i, sched_group_span(group), env->cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9585) 		unsigned long capacity, load, util;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9586) 		unsigned int nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9587) 		enum fbq_type rt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9589) 		rq = cpu_rq(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9590) 		rt = fbq_classify_rq(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9592) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9593) 		 * We classify groups/runqueues into three groups:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9594) 		 *  - regular: there are !numa tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9595) 		 *  - remote:  there are numa tasks that run on the 'wrong' node
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9596) 		 *  - all:     there is no distinction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9597) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9598) 		 * In order to avoid migrating ideally placed numa tasks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9599) 		 * ignore those when there's better options.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9600) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9601) 		 * If we ignore the actual busiest queue to migrate another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9602) 		 * task, the next balance pass can still reduce the busiest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9603) 		 * queue by moving tasks around inside the node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9604) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9605) 		 * If we cannot move enough load due to this classification
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9606) 		 * the next pass will adjust the group classification and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9607) 		 * allow migration of more tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9608) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9609) 		 * Both cases only affect the total convergence complexity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9610) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9611) 		if (rt > env->fbq_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9612) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9614) 		capacity = capacity_of(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9615) 		nr_running = rq->cfs.h_nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9617) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9618) 		 * For ASYM_CPUCAPACITY domains, don't pick a CPU that could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9619) 		 * eventually lead to active_balancing high->low capacity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9620) 		 * Higher per-CPU capacity is considered better than balancing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9621) 		 * average load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9622) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9623) 		if (env->sd->flags & SD_ASYM_CPUCAPACITY &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9624) 		    capacity_of(env->dst_cpu) < capacity &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9625) 		    nr_running == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9626) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9628) 		switch (env->migration_type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9629) 		case migrate_load:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9630) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9631) 			 * When comparing with load imbalance, use cpu_load()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9632) 			 * which is not scaled with the CPU capacity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9633) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9634) 			load = cpu_load(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9636) 			if (nr_running == 1 && load > env->imbalance &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9637) 			    !check_cpu_capacity(rq, env->sd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9638) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9640) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9641) 			 * For the load comparisons with the other CPUs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9642) 			 * consider the cpu_load() scaled with the CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9643) 			 * capacity, so that the load can be moved away
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9644) 			 * from the CPU that is potentially running at a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9645) 			 * lower capacity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9646) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9647) 			 * Thus we're looking for max(load_i / capacity_i),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9648) 			 * crosswise multiplication to rid ourselves of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9649) 			 * division works out to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9650) 			 * load_i * capacity_j > load_j * capacity_i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9651) 			 * where j is our previous maximum.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9652) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9653) 			if (load * busiest_capacity > busiest_load * capacity) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9654) 				busiest_load = load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9655) 				busiest_capacity = capacity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9656) 				busiest = rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9657) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9658) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9660) 		case migrate_util:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9661) 			util = cpu_util(cpu_of(rq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9663) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9664) 			 * Don't try to pull utilization from a CPU with one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9665) 			 * running task. Whatever its utilization, we will fail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9666) 			 * detach the task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9667) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9668) 			if (nr_running <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9669) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9671) 			if (busiest_util < util) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9672) 				busiest_util = util;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9673) 				busiest = rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9674) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9675) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9677) 		case migrate_task:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9678) 			if (busiest_nr < nr_running) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9679) 				busiest_nr = nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9680) 				busiest = rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9681) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9682) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9684) 		case migrate_misfit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9685) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9686) 			 * For ASYM_CPUCAPACITY domains with misfit tasks we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9687) 			 * simply seek the "biggest" misfit task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9688) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9689) 			if (rq->misfit_task_load > busiest_load) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9690) 				busiest_load = rq->misfit_task_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9691) 				busiest = rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9692) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9693) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9694) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9696) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9697) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9699) 	return busiest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9702) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9703)  * Max backoff if we encounter pinned tasks. Pretty arbitrary value, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9704)  * so long as it is large enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9705)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9706) #define MAX_PINNED_INTERVAL	512
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9708) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9709) asym_active_balance(struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9710) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9711) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9712) 	 * ASYM_PACKING needs to force migrate tasks from busy but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9713) 	 * lower priority CPUs in order to pack all tasks in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9714) 	 * highest priority CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9715) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9716) 	return env->idle != CPU_NOT_IDLE && (env->sd->flags & SD_ASYM_PACKING) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9717) 	       sched_asym_prefer(env->dst_cpu, env->src_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9720) static inline bool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9721) voluntary_active_balance(struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9722) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9723) 	struct sched_domain *sd = env->sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9725) 	if (asym_active_balance(env))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9726) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9728) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9729) 	 * The dst_cpu is idle and the src_cpu CPU has only 1 CFS task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9730) 	 * It's worth migrating the task if the src_cpu's capacity is reduced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9731) 	 * because of other sched_class or IRQs if more capacity stays
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9732) 	 * available on dst_cpu.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9733) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9734) 	if ((env->idle != CPU_NOT_IDLE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9735) 	    (env->src_rq->cfs.h_nr_running == 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9736) 		if ((check_cpu_capacity(env->src_rq, sd)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9737) 		    (capacity_of(env->src_cpu)*sd->imbalance_pct < capacity_of(env->dst_cpu)*100))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9738) 			return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9739) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9741) 	if (env->migration_type == migrate_misfit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9742) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9744) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9747) static int need_active_balance(struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9748) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9749) 	struct sched_domain *sd = env->sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9751) 	if (voluntary_active_balance(env))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9752) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9754) 	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9757) static int active_load_balance_cpu_stop(void *data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9759) static int should_we_balance(struct lb_env *env)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9760) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9761) 	struct sched_group *sg = env->sd->groups;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9762) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9764) 	if (IS_ENABLED(CONFIG_ROCKCHIP_PERFORMANCE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9765) 		struct root_domain *rd = env->dst_rq->rd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9766) 		struct cpumask *cpul_mask = rockchip_perf_get_cpul_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9767) 		int level = rockchip_perf_get_level();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9769) 		if ((level == ROCKCHIP_PERFORMANCE_HIGH) && !READ_ONCE(rd->overutilized) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9770) 		    cpul_mask && cpumask_test_cpu(env->dst_cpu, cpul_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9771) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9772) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9774) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9775) 	 * Ensure the balancing environment is consistent; can happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9776) 	 * when the softirq triggers 'during' hotplug.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9777) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9778) 	if (!cpumask_test_cpu(env->dst_cpu, env->cpus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9779) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9781) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9782) 	 * In the newly idle case, we will allow all the CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9783) 	 * to do the newly idle load balance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9784) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9785) 	if (env->idle == CPU_NEWLY_IDLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9786) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9788) 	/* Try to find first idle CPU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9789) 	for_each_cpu_and(cpu, group_balance_mask(sg), env->cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9790) 		if (!idle_cpu(cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9791) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9793) 		/* Are we the first idle CPU? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9794) 		return cpu == env->dst_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9795) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9797) 	/* Are we the first CPU of this group ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9798) 	return group_balance_cpu(sg) == env->dst_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9799) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9801) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9802)  * Check this_cpu to ensure it is balanced within domain. Attempt to move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9803)  * tasks if there is an imbalance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9804)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9805) static int load_balance(int this_cpu, struct rq *this_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9806) 			struct sched_domain *sd, enum cpu_idle_type idle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9807) 			int *continue_balancing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9808) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9809) 	int ld_moved, cur_ld_moved, active_balance = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9810) 	struct sched_domain *sd_parent = sd->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9811) 	struct sched_group *group;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9812) 	struct rq *busiest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9813) 	struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9814) 	struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9816) 	struct lb_env env = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9817) 		.sd		= sd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9818) 		.dst_cpu	= this_cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9819) 		.dst_rq		= this_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9820) 		.dst_grpmask    = sched_group_span(sd->groups),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9821) 		.idle		= idle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9822) 		.loop_break	= sched_nr_migrate_break,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9823) 		.cpus		= cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9824) 		.fbq_type	= all,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9825) 		.tasks		= LIST_HEAD_INIT(env.tasks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9826) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9828) 	cpumask_and(cpus, sched_domain_span(sd), cpu_active_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9830) 	schedstat_inc(sd->lb_count[idle]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9832) redo:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9833) 	if (!should_we_balance(&env)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9834) 		*continue_balancing = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9835) 		goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9836) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9838) 	group = find_busiest_group(&env);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9839) 	if (!group) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9840) 		schedstat_inc(sd->lb_nobusyg[idle]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9841) 		goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9842) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9843) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9844) 	busiest = find_busiest_queue(&env, group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9845) 	if (!busiest) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9846) 		schedstat_inc(sd->lb_nobusyq[idle]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9847) 		goto out_balanced;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9848) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9850) 	BUG_ON(busiest == env.dst_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9852) 	schedstat_add(sd->lb_imbalance[idle], env.imbalance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9853) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9854) 	env.src_cpu = busiest->cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9855) 	env.src_rq = busiest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9857) 	ld_moved = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9858) 	if (busiest->nr_running > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9859) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9860) 		 * Attempt to move tasks. If find_busiest_group has found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9861) 		 * an imbalance but busiest->nr_running <= 1, the group is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9862) 		 * still unbalanced. ld_moved simply stays zero, so it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9863) 		 * correctly treated as an imbalance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9864) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9865) 		env.flags |= LBF_ALL_PINNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9866) 		env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9867) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9868) more_balance:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9869) 		rq_lock_irqsave(busiest, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9870) 		env.src_rq_rf = &rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9871) 		update_rq_clock(busiest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9873) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9874) 		 * cur_ld_moved - load moved in current iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9875) 		 * ld_moved     - cumulative load moved across iterations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9876) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9877) 		cur_ld_moved = detach_tasks(&env);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9879) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9880) 		 * We've detached some tasks from busiest_rq. Every
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9881) 		 * task is masked "TASK_ON_RQ_MIGRATING", so we can safely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9882) 		 * unlock busiest->lock, and we are able to be sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9883) 		 * that nobody can manipulate the tasks in parallel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9884) 		 * See task_rq_lock() family for the details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9885) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9887) 		rq_unlock(busiest, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9889) 		if (cur_ld_moved) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9890) 			attach_tasks(&env);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9891) 			ld_moved += cur_ld_moved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9892) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9894) 		local_irq_restore(rf.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9896) 		if (env.flags & LBF_NEED_BREAK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9897) 			env.flags &= ~LBF_NEED_BREAK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9898) 			goto more_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9899) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9901) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9902) 		 * Revisit (affine) tasks on src_cpu that couldn't be moved to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9903) 		 * us and move them to an alternate dst_cpu in our sched_group
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9904) 		 * where they can run. The upper limit on how many times we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9905) 		 * iterate on same src_cpu is dependent on number of CPUs in our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9906) 		 * sched_group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9907) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9908) 		 * This changes load balance semantics a bit on who can move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9909) 		 * load to a given_cpu. In addition to the given_cpu itself
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9910) 		 * (or a ilb_cpu acting on its behalf where given_cpu is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9911) 		 * nohz-idle), we now have balance_cpu in a position to move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9912) 		 * load to given_cpu. In rare situations, this may cause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9913) 		 * conflicts (balance_cpu and given_cpu/ilb_cpu deciding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9914) 		 * _independently_ and at _same_ time to move some load to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9915) 		 * given_cpu) causing exceess load to be moved to given_cpu.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9916) 		 * This however should not happen so much in practice and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9917) 		 * moreover subsequent load balance cycles should correct the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9918) 		 * excess load moved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9919) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9920) 		if ((env.flags & LBF_DST_PINNED) && env.imbalance > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9922) 			/* Prevent to re-select dst_cpu via env's CPUs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9923) 			__cpumask_clear_cpu(env.dst_cpu, env.cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9924) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9925) 			env.dst_rq	 = cpu_rq(env.new_dst_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9926) 			env.dst_cpu	 = env.new_dst_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9927) 			env.flags	&= ~LBF_DST_PINNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9928) 			env.loop	 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9929) 			env.loop_break	 = sched_nr_migrate_break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9931) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9932) 			 * Go back to "more_balance" rather than "redo" since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9933) 			 * need to continue with same src_cpu.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9934) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9935) 			goto more_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9936) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9938) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9939) 		 * We failed to reach balance because of affinity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9940) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9941) 		if (sd_parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9942) 			int *group_imbalance = &sd_parent->groups->sgc->imbalance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9943) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9944) 			if ((env.flags & LBF_SOME_PINNED) && env.imbalance > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9945) 				*group_imbalance = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9946) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9948) 		/* All tasks on this runqueue were pinned by CPU affinity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9949) 		if (unlikely(env.flags & LBF_ALL_PINNED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9950) 			__cpumask_clear_cpu(cpu_of(busiest), cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9951) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9952) 			 * Attempting to continue load balancing at the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9953) 			 * sched_domain level only makes sense if there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9954) 			 * active CPUs remaining as possible busiest CPUs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9955) 			 * pull load from which are not contained within the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9956) 			 * destination group that is receiving any migrated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9957) 			 * load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9958) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9959) 			if (!cpumask_subset(cpus, env.dst_grpmask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9960) 				env.loop = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9961) 				env.loop_break = sched_nr_migrate_break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9962) 				goto redo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9963) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9964) 			goto out_all_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9965) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9966) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9968) 	if (!ld_moved) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9969) 		schedstat_inc(sd->lb_failed[idle]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9970) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9971) 		 * Increment the failure counter only on periodic balance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9972) 		 * We do not want newidle balance, which can be very
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9973) 		 * frequent, pollute the failure counter causing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9974) 		 * excessive cache_hot migrations and active balances.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9975) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9976) 		if (idle != CPU_NEWLY_IDLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9977) 			sd->nr_balance_failed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9979) 		if (need_active_balance(&env)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9980) 			unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9982) 			raw_spin_lock_irqsave(&busiest->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9984) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9985) 			 * Don't kick the active_load_balance_cpu_stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9986) 			 * if the curr task on busiest CPU can't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9987) 			 * moved to this_cpu:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9988) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9989) 			if (!cpumask_test_cpu(this_cpu, busiest->curr->cpus_ptr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9990) 				raw_spin_unlock_irqrestore(&busiest->lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9991) 							    flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9992) 				env.flags |= LBF_ALL_PINNED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9993) 				goto out_one_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9994) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9996) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9997) 			 * ->active_balance synchronizes accesses to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9998) 			 * ->active_balance_work.  Once set, it's cleared
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  9999) 			 * only after active load balance is finished.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10000) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10001) 			if (!busiest->active_balance) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10002) 				busiest->active_balance = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10003) 				busiest->push_cpu = this_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10004) 				active_balance = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10005) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10006) 			raw_spin_unlock_irqrestore(&busiest->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10007) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10008) 			if (active_balance) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10009) 				stop_one_cpu_nowait(cpu_of(busiest),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10010) 					active_load_balance_cpu_stop, busiest,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10011) 					&busiest->active_balance_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10012) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10013) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10014) 			/* We've kicked active balancing, force task migration. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10015) 			sd->nr_balance_failed = sd->cache_nice_tries+1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10016) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10017) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10018) 		sd->nr_balance_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10020) 	if (likely(!active_balance) || voluntary_active_balance(&env)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10021) 		/* We were unbalanced, so reset the balancing interval */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10022) 		sd->balance_interval = sd->min_interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10023) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10024) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10025) 		 * If we've begun active balancing, start to back off. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10026) 		 * case may not be covered by the all_pinned logic if there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10027) 		 * is only 1 task on the busy runqueue (because we don't call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10028) 		 * detach_tasks).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10029) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10030) 		if (sd->balance_interval < sd->max_interval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10031) 			sd->balance_interval *= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10032) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10033) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10034) 	goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10036) out_balanced:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10037) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10038) 	 * We reach balance although we may have faced some affinity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10039) 	 * constraints. Clear the imbalance flag only if other tasks got
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10040) 	 * a chance to move and fix the imbalance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10041) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10042) 	if (sd_parent && !(env.flags & LBF_ALL_PINNED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10043) 		int *group_imbalance = &sd_parent->groups->sgc->imbalance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10044) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10045) 		if (*group_imbalance)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10046) 			*group_imbalance = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10047) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10048) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10049) out_all_pinned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10050) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10051) 	 * We reach balance because all tasks are pinned at this level so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10052) 	 * we can't migrate them. Let the imbalance flag set so parent level
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10053) 	 * can try to migrate them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10054) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10055) 	schedstat_inc(sd->lb_balanced[idle]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10057) 	sd->nr_balance_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10059) out_one_pinned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10060) 	ld_moved = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10062) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10063) 	 * newidle_balance() disregards balance intervals, so we could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10064) 	 * repeatedly reach this code, which would lead to balance_interval
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10065) 	 * skyrocketting in a short amount of time. Skip the balance_interval
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10066) 	 * increase logic to avoid that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10067) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10068) 	if (env.idle == CPU_NEWLY_IDLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10069) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10071) 	/* tune up the balancing interval */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10072) 	if ((env.flags & LBF_ALL_PINNED &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10073) 	     sd->balance_interval < MAX_PINNED_INTERVAL) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10074) 	    sd->balance_interval < sd->max_interval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10075) 		sd->balance_interval *= 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10076) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10077) 	return ld_moved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10078) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10080) static inline unsigned long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10081) get_sd_balance_interval(struct sched_domain *sd, int cpu_busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10082) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10083) 	unsigned long interval = sd->balance_interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10085) 	if (cpu_busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10086) 		interval *= sd->busy_factor;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10087) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10088) 	/* scale ms to jiffies */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10089) 	interval = msecs_to_jiffies(interval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10090) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10091) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10092) 	 * Reduce likelihood of busy balancing at higher domains racing with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10093) 	 * balancing at lower domains by preventing their balancing periods
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10094) 	 * from being multiples of each other.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10095) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10096) 	if (cpu_busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10097) 		interval -= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10099) 	interval = clamp(interval, 1UL, max_load_balance_interval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10101) 	return interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10104) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10105) update_next_balance(struct sched_domain *sd, unsigned long *next_balance)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10107) 	unsigned long interval, next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10109) 	/* used by idle balance, so cpu_busy = 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10110) 	interval = get_sd_balance_interval(sd, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10111) 	next = sd->last_balance + interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10113) 	if (time_after(*next_balance, next))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10114) 		*next_balance = next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10117) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10118)  * active_load_balance_cpu_stop is run by the CPU stopper. It pushes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10119)  * running tasks off the busiest CPU onto idle CPUs. It requires at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10120)  * least 1 task to be running on each physical CPU where possible, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10121)  * avoids physical / logical imbalances.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10122)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10123) static int active_load_balance_cpu_stop(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10124) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10125) 	struct rq *busiest_rq = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10126) 	int busiest_cpu = cpu_of(busiest_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10127) 	int target_cpu = busiest_rq->push_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10128) 	struct rq *target_rq = cpu_rq(target_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10129) 	struct sched_domain *sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10130) 	struct task_struct *p = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10131) 	struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10133) 	rq_lock_irq(busiest_rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10134) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10135) 	 * Between queueing the stop-work and running it is a hole in which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10136) 	 * CPUs can become inactive. We should not move tasks from or to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10137) 	 * inactive CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10138) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10139) 	if (!cpu_active(busiest_cpu) || !cpu_active(target_cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10140) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10142) 	/* Make sure the requested CPU hasn't gone down in the meantime: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10143) 	if (unlikely(busiest_cpu != smp_processor_id() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10144) 		     !busiest_rq->active_balance))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10145) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10147) 	/* Is there any task to move? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10148) 	if (busiest_rq->nr_running <= 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10149) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10151) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10152) 	 * This condition is "impossible", if it occurs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10153) 	 * we need to fix it. Originally reported by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10154) 	 * Bjorn Helgaas on a 128-CPU setup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10155) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10156) 	BUG_ON(busiest_rq == target_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10158) 	/* Search for an sd spanning us and the target CPU. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10159) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10160) 	for_each_domain(target_cpu, sd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10161) 		if (cpumask_test_cpu(busiest_cpu, sched_domain_span(sd)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10162) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10163) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10165) 	if (likely(sd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10166) 		struct lb_env env = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10167) 			.sd		= sd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10168) 			.dst_cpu	= target_cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10169) 			.dst_rq		= target_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10170) 			.src_cpu	= busiest_rq->cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10171) 			.src_rq		= busiest_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10172) 			.idle		= CPU_IDLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10173) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10174) 			 * can_migrate_task() doesn't need to compute new_dst_cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10175) 			 * for active balancing. Since we have CPU_IDLE, but no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10176) 			 * @dst_grpmask we need to make that test go away with lying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10177) 			 * about DST_PINNED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10178) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10179) 			.flags		= LBF_DST_PINNED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10180) 			.src_rq_rf	= &rf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10181) 		};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10183) 		schedstat_inc(sd->alb_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10184) 		update_rq_clock(busiest_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10186) 		p = detach_one_task(&env);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10187) 		if (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10188) 			schedstat_inc(sd->alb_pushed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10189) 			/* Active balancing done, reset the failure counter. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10190) 			sd->nr_balance_failed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10191) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10192) 			schedstat_inc(sd->alb_failed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10193) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10194) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10195) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10196) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10197) 	busiest_rq->active_balance = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10198) 	rq_unlock(busiest_rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10200) 	if (p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10201) 		attach_one_task(target_rq, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10203) 	local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10205) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10208) static DEFINE_SPINLOCK(balancing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10210) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10211)  * Scale the max load_balance interval with the number of CPUs in the system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10212)  * This trades load-balance latency on larger machines for less cross talk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10213)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10214) void update_max_interval(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10216) 	max_load_balance_interval = HZ*num_active_cpus()/10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10219) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10220)  * It checks each scheduling domain to see if it is due to be balanced,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10221)  * and initiates a balancing operation if so.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10222)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10223)  * Balancing parameters are set up in init_sched_domains.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10224)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10225) static void rebalance_domains(struct rq *rq, enum cpu_idle_type idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10226) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10227) 	int continue_balancing = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10228) 	int cpu = rq->cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10229) 	int busy = idle != CPU_IDLE && !sched_idle_cpu(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10230) 	unsigned long interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10231) 	struct sched_domain *sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10232) 	/* Earliest time when we have to do rebalance again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10233) 	unsigned long next_balance = jiffies + 60*HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10234) 	int update_next_balance = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10235) 	int need_serialize, need_decay = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10236) 	u64 max_cost = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10238) 	trace_android_rvh_sched_rebalance_domains(rq, &continue_balancing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10239) 	if (!continue_balancing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10240) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10242) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10243) 	for_each_domain(cpu, sd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10244) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10245) 		 * Decay the newidle max times here because this is a regular
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10246) 		 * visit to all the domains. Decay ~1% per second.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10247) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10248) 		if (time_after(jiffies, sd->next_decay_max_lb_cost)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10249) 			sd->max_newidle_lb_cost =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10250) 				(sd->max_newidle_lb_cost * 253) / 256;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10251) 			sd->next_decay_max_lb_cost = jiffies + HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10252) 			need_decay = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10253) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10254) 		max_cost += sd->max_newidle_lb_cost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10256) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10257) 		 * Stop the load balance at this level. There is another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10258) 		 * CPU in our sched group which is doing load balancing more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10259) 		 * actively.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10260) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10261) 		if (!continue_balancing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10262) 			if (need_decay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10263) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10264) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10265) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10267) 		interval = get_sd_balance_interval(sd, busy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10269) 		need_serialize = sd->flags & SD_SERIALIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10270) 		if (need_serialize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10271) 			if (!spin_trylock(&balancing))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10272) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10273) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10275) 		if (time_after_eq(jiffies, sd->last_balance + interval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10276) 			if (load_balance(cpu, rq, sd, idle, &continue_balancing)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10277) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10278) 				 * The LBF_DST_PINNED logic could have changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10279) 				 * env->dst_cpu, so we can't know our idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10280) 				 * state even if we migrated tasks. Update it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10281) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10282) 				idle = idle_cpu(cpu) ? CPU_IDLE : CPU_NOT_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10283) 				busy = idle != CPU_IDLE && !sched_idle_cpu(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10284) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10285) 			sd->last_balance = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10286) 			interval = get_sd_balance_interval(sd, busy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10287) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10288) 		if (need_serialize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10289) 			spin_unlock(&balancing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10290) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10291) 		if (time_after(next_balance, sd->last_balance + interval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10292) 			next_balance = sd->last_balance + interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10293) 			update_next_balance = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10294) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10295) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10296) 	if (need_decay) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10297) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10298) 		 * Ensure the rq-wide value also decays but keep it at a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10299) 		 * reasonable floor to avoid funnies with rq->avg_idle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10300) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10301) 		rq->max_idle_balance_cost =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10302) 			max((u64)sysctl_sched_migration_cost, max_cost);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10303) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10304) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10306) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10307) 	 * next_balance will be updated only when there is a need.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10308) 	 * When the cpu is attached to null domain for ex, it will not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10309) 	 * updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10310) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10311) 	if (likely(update_next_balance)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10312) 		rq->next_balance = next_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10314) #ifdef CONFIG_NO_HZ_COMMON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10315) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10316) 		 * If this CPU has been elected to perform the nohz idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10317) 		 * balance. Other idle CPUs have already rebalanced with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10318) 		 * nohz_idle_balance() and nohz.next_balance has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10319) 		 * updated accordingly. This CPU is now running the idle load
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10320) 		 * balance for itself and we need to update the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10321) 		 * nohz.next_balance accordingly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10322) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10323) 		if ((idle == CPU_IDLE) && time_after(nohz.next_balance, rq->next_balance))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10324) 			nohz.next_balance = rq->next_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10325) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10326) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10329) static inline int on_null_domain(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10331) 	return unlikely(!rcu_dereference_sched(rq->sd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10334) #ifdef CONFIG_NO_HZ_COMMON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10335) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10336)  * idle load balancing details
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10337)  * - When one of the busy CPUs notice that there may be an idle rebalancing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10338)  *   needed, they will kick the idle load balancer, which then does idle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10339)  *   load balancing for all the idle CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10340)  * - HK_FLAG_MISC CPUs are used for this task, because HK_FLAG_SCHED not set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10341)  *   anywhere yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10342)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10344) static inline int find_new_ilb(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10345) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10346) 	int ilb = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10348) 	trace_android_rvh_find_new_ilb(nohz.idle_cpus_mask, &ilb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10349) 	if (ilb >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10350) 		return ilb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10352) 	for_each_cpu_and(ilb, nohz.idle_cpus_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10353) 			      housekeeping_cpumask(HK_FLAG_MISC)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10354) 		if (idle_cpu(ilb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10355) 			return ilb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10356) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10358) 	return nr_cpu_ids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10361) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10362)  * Kick a CPU to do the nohz balancing, if it is time for it. We pick any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10363)  * idle CPU in the HK_FLAG_MISC housekeeping set (if there is one).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10364)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10365) static void kick_ilb(unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10367) 	int ilb_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10369) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10370) 	 * Increase nohz.next_balance only when if full ilb is triggered but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10371) 	 * not if we only update stats.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10372) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10373) 	if (flags & NOHZ_BALANCE_KICK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10374) 		nohz.next_balance = jiffies+1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10376) 	ilb_cpu = find_new_ilb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10378) 	if (ilb_cpu >= nr_cpu_ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10379) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10381) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10382) 	 * Access to rq::nohz_csd is serialized by NOHZ_KICK_MASK; he who sets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10383) 	 * the first flag owns it; cleared by nohz_csd_func().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10384) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10385) 	flags = atomic_fetch_or(flags, nohz_flags(ilb_cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10386) 	if (flags & NOHZ_KICK_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10387) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10389) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10390) 	 * This way we generate an IPI on the target CPU which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10391) 	 * is idle. And the softirq performing nohz idle load balance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10392) 	 * will be run before returning from the IPI.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10393) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10394) 	smp_call_function_single_async(ilb_cpu, &cpu_rq(ilb_cpu)->nohz_csd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10397) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10398)  * Current decision point for kicking the idle load balancer in the presence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10399)  * of idle CPUs in the system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10400)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10401) static void nohz_balancer_kick(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10402) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10403) 	unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10404) 	struct sched_domain_shared *sds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10405) 	struct sched_domain *sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10406) 	int nr_busy, i, cpu = rq->cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10407) 	unsigned int flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10408) 	int done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10410) 	if (unlikely(rq->idle_balance))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10411) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10413) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10414) 	 * We may be recently in ticked or tickless idle mode. At the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10415) 	 * busy tick after returning from idle, we will update the busy stats.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10416) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10417) 	nohz_balance_exit_idle(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10419) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10420) 	 * None are in tickless mode and hence no need for NOHZ idle load
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10421) 	 * balancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10422) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10423) 	if (likely(!atomic_read(&nohz.nr_cpus)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10424) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10426) 	if (READ_ONCE(nohz.has_blocked) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10427) 	    time_after(now, READ_ONCE(nohz.next_blocked)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10428) 		flags = NOHZ_STATS_KICK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10430) 	if (time_before(now, nohz.next_balance))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10431) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10433) 	trace_android_rvh_sched_nohz_balancer_kick(rq, &flags, &done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10434) 	if (done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10435) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10437) 	if (rq->nr_running >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10438) 		flags = NOHZ_KICK_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10439) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10440) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10442) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10444) 	sd = rcu_dereference(rq->sd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10445) 	if (sd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10446) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10447) 		 * If there's a CFS task and the current CPU has reduced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10448) 		 * capacity; kick the ILB to see if there's a better CPU to run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10449) 		 * on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10450) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10451) 		if (rq->cfs.h_nr_running >= 1 && check_cpu_capacity(rq, sd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10452) 			flags = NOHZ_KICK_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10453) 			goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10454) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10455) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10457) 	sd = rcu_dereference(per_cpu(sd_asym_packing, cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10458) 	if (sd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10459) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10460) 		 * When ASYM_PACKING; see if there's a more preferred CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10461) 		 * currently idle; in which case, kick the ILB to move tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10462) 		 * around.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10463) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10464) 		for_each_cpu_and(i, sched_domain_span(sd), nohz.idle_cpus_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10465) 			if (sched_asym_prefer(i, cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10466) 				flags = NOHZ_KICK_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10467) 				goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10468) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10469) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10470) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10472) 	sd = rcu_dereference(per_cpu(sd_asym_cpucapacity, cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10473) 	if (sd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10474) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10475) 		 * When ASYM_CPUCAPACITY; see if there's a higher capacity CPU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10476) 		 * to run the misfit task on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10477) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10478) 		if (check_misfit_status(rq, sd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10479) 			flags = NOHZ_KICK_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10480) 			goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10481) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10483) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10484) 		 * For asymmetric systems, we do not want to nicely balance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10485) 		 * cache use, instead we want to embrace asymmetry and only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10486) 		 * ensure tasks have enough CPU capacity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10487) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10488) 		 * Skip the LLC logic because it's not relevant in that case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10489) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10490) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10491) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10493) 	sds = rcu_dereference(per_cpu(sd_llc_shared, cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10494) 	if (sds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10495) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10496) 		 * If there is an imbalance between LLC domains (IOW we could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10497) 		 * increase the overall cache use), we need some less-loaded LLC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10498) 		 * domain to pull some load. Likewise, we may need to spread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10499) 		 * load within the current LLC domain (e.g. packed SMT cores but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10500) 		 * other CPUs are idle). We can't really know from here how busy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10501) 		 * the others are - so just get a nohz balance going if it looks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10502) 		 * like this LLC domain has tasks we could move.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10503) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10504) 		nr_busy = atomic_read(&sds->nr_busy_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10505) 		if (nr_busy > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10506) 			flags = NOHZ_KICK_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10507) 			goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10508) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10509) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10510) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10511) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10512) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10513) 	if (flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10514) 		kick_ilb(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10517) static void set_cpu_sd_state_busy(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10518) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10519) 	struct sched_domain *sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10521) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10522) 	sd = rcu_dereference(per_cpu(sd_llc, cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10524) 	if (!sd || !sd->nohz_idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10525) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10526) 	sd->nohz_idle = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10528) 	atomic_inc(&sd->shared->nr_busy_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10529) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10530) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10533) void nohz_balance_exit_idle(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10534) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10535) 	SCHED_WARN_ON(rq != this_rq());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10537) 	if (likely(!rq->nohz_tick_stopped))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10538) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10540) 	rq->nohz_tick_stopped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10541) 	cpumask_clear_cpu(rq->cpu, nohz.idle_cpus_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10542) 	atomic_dec(&nohz.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10544) 	set_cpu_sd_state_busy(rq->cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10547) static void set_cpu_sd_state_idle(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10548) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10549) 	struct sched_domain *sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10551) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10552) 	sd = rcu_dereference(per_cpu(sd_llc, cpu));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10554) 	if (!sd || sd->nohz_idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10555) 		goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10556) 	sd->nohz_idle = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10558) 	atomic_dec(&sd->shared->nr_busy_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10559) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10560) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10563) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10564)  * This routine will record that the CPU is going idle with tick stopped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10565)  * This info will be used in performing idle load balancing in the future.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10566)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10567) void nohz_balance_enter_idle(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10568) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10569) 	struct rq *rq = cpu_rq(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10571) 	SCHED_WARN_ON(cpu != smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10573) 	if (!cpu_active(cpu)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10574) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10575) 		 * A CPU can be paused while it is idle with it's tick
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10576) 		 * stopped. nohz_balance_exit_idle() should be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10577) 		 * from the local CPU, so it can't be called during
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10578) 		 * pause. This results in paused CPU participating in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10579) 		 * the nohz idle balance, which should be avoided.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10580) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10581) 		 * When the paused CPU exits idle and enters again,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10582) 		 * exempt the paused CPU from nohz_balance_exit_idle.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10583) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10584) 		nohz_balance_exit_idle(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10585) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10586) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10588) 	/* Spare idle load balancing on CPUs that don't want to be disturbed: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10589) 	if (!housekeeping_cpu(cpu, HK_FLAG_SCHED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10590) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10592) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10593) 	 * Can be set safely without rq->lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10594) 	 * If a clear happens, it will have evaluated last additions because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10595) 	 * rq->lock is held during the check and the clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10596) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10597) 	rq->has_blocked_load = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10599) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10600) 	 * The tick is still stopped but load could have been added in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10601) 	 * meantime. We set the nohz.has_blocked flag to trig a check of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10602) 	 * *_avg. The CPU is already part of nohz.idle_cpus_mask so the clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10603) 	 * of nohz.has_blocked can only happen after checking the new load
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10604) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10605) 	if (rq->nohz_tick_stopped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10606) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10608) 	/* If we're a completely isolated CPU, we don't play: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10609) 	if (on_null_domain(rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10610) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10612) 	rq->nohz_tick_stopped = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10614) 	cpumask_set_cpu(cpu, nohz.idle_cpus_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10615) 	atomic_inc(&nohz.nr_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10617) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10618) 	 * Ensures that if nohz_idle_balance() fails to observe our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10619) 	 * @idle_cpus_mask store, it must observe the @has_blocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10620) 	 * store.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10621) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10622) 	smp_mb__after_atomic();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10624) 	set_cpu_sd_state_idle(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10626) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10627) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10628) 	 * Each time a cpu enter idle, we assume that it has blocked load and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10629) 	 * enable the periodic update of the load of idle cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10630) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10631) 	WRITE_ONCE(nohz.has_blocked, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10634) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10635)  * Internal function that runs load balance for all idle cpus. The load balance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10636)  * can be a simple update of blocked load or a complete load balance with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10637)  * tasks movement depending of flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10638)  * The function returns false if the loop has stopped before running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10639)  * through all idle CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10640)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10641) static bool _nohz_idle_balance(struct rq *this_rq, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10642) 			       enum cpu_idle_type idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10644) 	/* Earliest time when we have to do rebalance again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10645) 	unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10646) 	unsigned long next_balance = now + 60*HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10647) 	bool has_blocked_load = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10648) 	int update_next_balance = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10649) 	int this_cpu = this_rq->cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10650) 	int balance_cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10651) 	int ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10652) 	struct rq *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10653) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10654) 	SCHED_WARN_ON((flags & NOHZ_KICK_MASK) == NOHZ_BALANCE_KICK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10656) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10657) 	 * We assume there will be no idle load after this update and clear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10658) 	 * the has_blocked flag. If a cpu enters idle in the mean time, it will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10659) 	 * set the has_blocked flag and trig another update of idle load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10660) 	 * Because a cpu that becomes idle, is added to idle_cpus_mask before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10661) 	 * setting the flag, we are sure to not clear the state and not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10662) 	 * check the load of an idle cpu.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10663) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10664) 	WRITE_ONCE(nohz.has_blocked, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10665) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10666) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10667) 	 * Ensures that if we miss the CPU, we must see the has_blocked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10668) 	 * store from nohz_balance_enter_idle().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10669) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10670) 	smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10672) 	for_each_cpu(balance_cpu, nohz.idle_cpus_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10673) 		if (balance_cpu == this_cpu || !idle_cpu(balance_cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10674) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10676) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10677) 		 * If this CPU gets work to do, stop the load balancing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10678) 		 * work being done for other CPUs. Next load
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10679) 		 * balancing owner will pick it up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10680) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10681) 		if (need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10682) 			has_blocked_load = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10683) 			goto abort;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10684) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10686) 		rq = cpu_rq(balance_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10688) 		has_blocked_load |= update_nohz_stats(rq, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10690) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10691) 		 * If time for next balance is due,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10692) 		 * do the balance.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10693) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10694) 		if (time_after_eq(jiffies, rq->next_balance)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10695) 			struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10696) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10697) 			rq_lock_irqsave(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10698) 			update_rq_clock(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10699) 			rq_unlock_irqrestore(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10701) 			if (flags & NOHZ_BALANCE_KICK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10702) 				rebalance_domains(rq, CPU_IDLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10703) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10705) 		if (time_after(next_balance, rq->next_balance)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10706) 			next_balance = rq->next_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10707) 			update_next_balance = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10708) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10709) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10711) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10712) 	 * next_balance will be updated only when there is a need.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10713) 	 * When the CPU is attached to null domain for ex, it will not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10714) 	 * updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10715) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10716) 	if (likely(update_next_balance))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10717) 		nohz.next_balance = next_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10719) 	/* Newly idle CPU doesn't need an update */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10720) 	if (idle != CPU_NEWLY_IDLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10721) 		update_blocked_averages(this_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10722) 		has_blocked_load |= this_rq->has_blocked_load;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10723) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10725) 	if (flags & NOHZ_BALANCE_KICK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10726) 		rebalance_domains(this_rq, CPU_IDLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10728) 	WRITE_ONCE(nohz.next_blocked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10729) 		now + msecs_to_jiffies(LOAD_AVG_PERIOD));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10731) 	/* The full idle balance loop has been done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10732) 	ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10734) abort:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10735) 	/* There is still blocked load, enable periodic update */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10736) 	if (has_blocked_load)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10737) 		WRITE_ONCE(nohz.has_blocked, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10739) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10742) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10743)  * In CONFIG_NO_HZ_COMMON case, the idle balance kickee will do the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10744)  * rebalancing for all the cpus for whom scheduler ticks are stopped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10745)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10746) static bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10747) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10748) 	unsigned int flags = this_rq->nohz_idle_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10750) 	if (!flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10751) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10753) 	this_rq->nohz_idle_balance = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10755) 	if (idle != CPU_IDLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10756) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10757) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10758) 	_nohz_idle_balance(this_rq, flags, idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10760) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10762) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10763) static void nohz_newidle_balance(struct rq *this_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10765) 	int this_cpu = this_rq->cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10767) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10768) 	 * This CPU doesn't want to be disturbed by scheduler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10769) 	 * housekeeping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10770) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10771) 	if (!housekeeping_cpu(this_cpu, HK_FLAG_SCHED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10772) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10774) 	/* Will wake up very soon. No time for doing anything else*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10775) 	if (this_rq->avg_idle < sysctl_sched_migration_cost)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10776) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10778) 	/* Don't need to update blocked load of idle CPUs*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10779) 	if (!READ_ONCE(nohz.has_blocked) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10780) 	    time_before(jiffies, READ_ONCE(nohz.next_blocked)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10781) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10783) 	raw_spin_unlock(&this_rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10784) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10785) 	 * This CPU is going to be idle and blocked load of idle CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10786) 	 * need to be updated. Run the ilb locally as it is a good
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10787) 	 * candidate for ilb instead of waking up another idle CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10788) 	 * Kick an normal ilb if we failed to do the update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10789) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10790) 	if (!_nohz_idle_balance(this_rq, NOHZ_STATS_KICK, CPU_NEWLY_IDLE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10791) 		kick_ilb(NOHZ_STATS_KICK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10792) 	raw_spin_lock(&this_rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10795) #else /* !CONFIG_NO_HZ_COMMON */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10796) static inline void nohz_balancer_kick(struct rq *rq) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10798) static inline bool nohz_idle_balance(struct rq *this_rq, enum cpu_idle_type idle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10799) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10800) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10803) static inline void nohz_newidle_balance(struct rq *this_rq) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10804) #endif /* CONFIG_NO_HZ_COMMON */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10805) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10806) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10807)  * idle_balance is called by schedule() if this_cpu is about to become
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10808)  * idle. Attempts to pull tasks from other CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10809)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10810)  * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10811)  *   < 0 - we released the lock and there are !fair tasks present
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10812)  *     0 - failed, no new tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10813)  *   > 0 - success, new (fair) tasks present
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10814)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10815) static int newidle_balance(struct rq *this_rq, struct rq_flags *rf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10816) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10817) 	unsigned long next_balance = jiffies + HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10818) 	int this_cpu = this_rq->cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10819) 	struct sched_domain *sd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10820) 	int pulled_task = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10821) 	u64 curr_cost = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10822) 	int done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10823) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10824) 	trace_android_rvh_sched_newidle_balance(this_rq, rf, &pulled_task, &done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10825) 	if (done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10826) 		return pulled_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10828) 	update_misfit_status(NULL, this_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10829) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10830) 	 * We must set idle_stamp _before_ calling idle_balance(), such that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10831) 	 * measure the duration of idle_balance() as idle time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10832) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10833) 	this_rq->idle_stamp = rq_clock(this_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10835) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10836) 	 * Do not pull tasks towards !active CPUs...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10837) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10838) 	if (!cpu_active(this_cpu))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10839) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10841) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10842) 	 * This is OK, because current is on_cpu, which avoids it being picked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10843) 	 * for load-balance and preemption/IRQs are still disabled avoiding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10844) 	 * further scheduler activity on it and we're being very careful to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10845) 	 * re-start the picking loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10846) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10847) 	rq_unpin_lock(this_rq, rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10849) 	if (this_rq->avg_idle < sysctl_sched_migration_cost ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10850) 	    !READ_ONCE(this_rq->rd->overload)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10852) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10853) 		sd = rcu_dereference_check_sched_domain(this_rq->sd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10854) 		if (sd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10855) 			update_next_balance(sd, &next_balance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10856) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10858) 		nohz_newidle_balance(this_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10860) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10861) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10863) 	raw_spin_unlock(&this_rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10864) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10865) 	update_blocked_averages(this_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10866) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10867) 	for_each_domain(this_cpu, sd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10868) 		int continue_balancing = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10869) 		u64 t0, domain_cost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10871) 		if (this_rq->avg_idle < curr_cost + sd->max_newidle_lb_cost) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10872) 			update_next_balance(sd, &next_balance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10873) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10874) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10876) 		if (sd->flags & SD_BALANCE_NEWIDLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10877) 			t0 = sched_clock_cpu(this_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10879) 			pulled_task = load_balance(this_cpu, this_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10880) 						   sd, CPU_NEWLY_IDLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10881) 						   &continue_balancing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10883) 			domain_cost = sched_clock_cpu(this_cpu) - t0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10884) 			if (domain_cost > sd->max_newidle_lb_cost)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10885) 				sd->max_newidle_lb_cost = domain_cost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10887) 			curr_cost += domain_cost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10888) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10890) 		update_next_balance(sd, &next_balance);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10892) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10893) 		 * Stop searching for tasks to pull if there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10894) 		 * now runnable tasks on this rq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10895) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10896) 		if (pulled_task || this_rq->nr_running > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10897) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10898) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10899) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10901) 	raw_spin_lock(&this_rq->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10903) 	if (curr_cost > this_rq->max_idle_balance_cost)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10904) 		this_rq->max_idle_balance_cost = curr_cost;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10906) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10907) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10908) 	 * While browsing the domains, we released the rq lock, a task could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10909) 	 * have been enqueued in the meantime. Since we're not going idle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10910) 	 * pretend we pulled a task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10911) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10912) 	if (this_rq->cfs.h_nr_running && !pulled_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10913) 		pulled_task = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10915) 	/* Move the next balance forward */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10916) 	if (time_after(this_rq->next_balance, next_balance))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10917) 		this_rq->next_balance = next_balance;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10919) 	/* Is there a task of a high priority class? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10920) 	if (this_rq->nr_running != this_rq->cfs.h_nr_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10921) 		pulled_task = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10923) 	if (pulled_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10924) 		this_rq->idle_stamp = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10926) 	rq_repin_lock(this_rq, rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10928) 	return pulled_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10930) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10931) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10932)  * run_rebalance_domains is triggered when needed from the scheduler tick.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10933)  * Also triggered for nohz idle balancing (with nohz_balancing_kick set).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10934)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10935) static __latent_entropy void run_rebalance_domains(struct softirq_action *h)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10937) 	struct rq *this_rq = this_rq();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10938) 	enum cpu_idle_type idle = this_rq->idle_balance ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10939) 						CPU_IDLE : CPU_NOT_IDLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10941) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10942) 	 * If this CPU has a pending nohz_balance_kick, then do the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10943) 	 * balancing on behalf of the other idle CPUs whose ticks are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10944) 	 * stopped. Do nohz_idle_balance *before* rebalance_domains to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10945) 	 * give the idle CPUs a chance to load balance. Else we may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10946) 	 * load balance only within the local sched_domain hierarchy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10947) 	 * and abort nohz_idle_balance altogether if we pull some load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10948) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10949) 	if (nohz_idle_balance(this_rq, idle))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10950) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10951) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10952) 	/* normal load balance */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10953) 	update_blocked_averages(this_rq->cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10954) 	rebalance_domains(this_rq, idle);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10957) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10958)  * Trigger the SCHED_SOFTIRQ if it is time to do periodic load balancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10959)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10960) void trigger_load_balance(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10962) 	/* Don't need to rebalance while attached to NULL domain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10963) 	if (unlikely(on_null_domain(rq)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10964) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10966) 	if (time_after_eq(jiffies, rq->next_balance))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10967) 		raise_softirq(SCHED_SOFTIRQ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10969) 	nohz_balancer_kick(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10970) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10971) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10972) static void rq_online_fair(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10973) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10974) 	update_sysctl();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10976) 	update_runtime_enabled(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10979) static void rq_offline_fair(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10981) 	update_sysctl();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10983) 	/* Ensure any throttled groups are reachable by pick_next_task */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10984) 	unthrottle_offline_cfs_rqs(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10986) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10987) #endif /* CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10989) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10990)  * scheduler tick hitting a task of our scheduling class.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10991)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10992)  * NOTE: This function can be called remotely by the tick offload that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10993)  * goes along full dynticks. Therefore no local assumption can be made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10994)  * and everything must be accessed through the @rq and @curr passed in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10995)  * parameters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10996)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10997) static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10998) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10999) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11000) 	struct sched_entity *se = &curr->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11001) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11002) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11003) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11004) 		entity_tick(cfs_rq, se, queued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11005) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11006) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11007) 	if (static_branch_unlikely(&sched_numa_balancing))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11008) 		task_tick_numa(rq, curr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11009) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11010) 	update_misfit_status(curr, rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11011) 	update_overutilized_status(task_rq(curr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11013) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11014) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11015)  * called on fork with the child task as argument from the parent's context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11016)  *  - child not yet on the tasklist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11017)  *  - preemption disabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11018)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11019) static void task_fork_fair(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11020) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11021) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11022) 	struct sched_entity *se = &p->se, *curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11023) 	struct rq *rq = this_rq();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11024) 	struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11026) 	rq_lock(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11027) 	update_rq_clock(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11028) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11029) 	cfs_rq = task_cfs_rq(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11030) 	curr = cfs_rq->curr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11031) 	if (curr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11032) 		update_curr(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11033) 		se->vruntime = curr->vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11034) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11035) 	place_entity(cfs_rq, se, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11036) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11037) 	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11038) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11039) 		 * Upon rescheduling, sched_class::put_prev_task() will place
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11040) 		 * 'current' within the tree based on its new key value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11041) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11042) 		swap(curr->vruntime, se->vruntime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11043) 		resched_curr(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11044) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11046) 	se->vruntime -= cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11047) 	rq_unlock(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11050) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11051)  * Priority of the task has changed. Check to see if we preempt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11052)  * the current task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11053)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11054) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11055) prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11056) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11057) 	if (!task_on_rq_queued(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11058) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11059) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11060) 	if (rq->cfs.nr_running == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11061) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11063) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11064) 	 * Reschedule if we are currently running on this runqueue and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11065) 	 * our priority decreased, or if we are not currently running on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11066) 	 * this runqueue and our priority is higher than the current's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11067) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11068) 	if (rq->curr == p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11069) 		if (p->prio > oldprio)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11070) 			resched_curr(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11071) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11072) 		check_preempt_curr(rq, p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11075) static inline bool vruntime_normalized(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11076) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11077) 	struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11079) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11080) 	 * In both the TASK_ON_RQ_QUEUED and TASK_ON_RQ_MIGRATING cases,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11081) 	 * the dequeue_entity(.flags=0) will already have normalized the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11082) 	 * vruntime.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11083) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11084) 	if (p->on_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11085) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11087) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11088) 	 * When !on_rq, vruntime of the task has usually NOT been normalized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11089) 	 * But there are some cases where it has already been normalized:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11090) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11091) 	 * - A forked child which is waiting for being woken up by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11092) 	 *   wake_up_new_task().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11093) 	 * - A task which has been woken up by try_to_wake_up() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11094) 	 *   waiting for actually being woken up by sched_ttwu_pending().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11095) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11096) 	if (!se->sum_exec_runtime ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11097) 	    (p->state == TASK_WAKING && p->sched_remote_wakeup))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11098) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11100) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11103) #ifdef CONFIG_FAIR_GROUP_SCHED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11104) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11105)  * Propagate the changes of the sched_entity across the tg tree to make it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11106)  * visible to the root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11107)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11108) static void propagate_entity_cfs_rq(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11109) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11110) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11112) 	list_add_leaf_cfs_rq(cfs_rq_of(se));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11114) 	/* Start to propagate at parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11115) 	se = se->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11117) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11118) 		cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11120) 		if (!cfs_rq_throttled(cfs_rq)){
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11121) 			update_load_avg(cfs_rq, se, UPDATE_TG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11122) 			list_add_leaf_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11123) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11124) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11126) 		if (list_add_leaf_cfs_rq(cfs_rq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11127) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11128) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11130) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11131) static void propagate_entity_cfs_rq(struct sched_entity *se) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11132) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11134) static void detach_entity_cfs_rq(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11135) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11136) 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11138) 	/* Catch up with the cfs_rq and remove our load when we leave */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11139) 	update_load_avg(cfs_rq, se, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11140) 	detach_entity_load_avg(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11141) 	update_tg_load_avg(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11142) 	propagate_entity_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11145) static void attach_entity_cfs_rq(struct sched_entity *se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11146) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11147) 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11149) #ifdef CONFIG_FAIR_GROUP_SCHED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11150) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11151) 	 * Since the real-depth could have been changed (only FAIR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11152) 	 * class maintain depth value), reset depth properly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11153) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11154) 	se->depth = se->parent ? se->parent->depth + 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11155) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11157) 	/* Synchronize entity with its cfs_rq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11158) 	update_load_avg(cfs_rq, se, sched_feat(ATTACH_AGE_LOAD) ? 0 : SKIP_AGE_LOAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11159) 	attach_entity_load_avg(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11160) 	update_tg_load_avg(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11161) 	propagate_entity_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11164) static void detach_task_cfs_rq(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11166) 	struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11167) 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11169) 	if (!vruntime_normalized(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11170) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11171) 		 * Fix up our vruntime so that the current sleep doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11172) 		 * cause 'unlimited' sleep bonus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11173) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11174) 		place_entity(cfs_rq, se, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11175) 		se->vruntime -= cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11176) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11178) 	detach_entity_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11181) static void attach_task_cfs_rq(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11183) 	struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11184) 	struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11186) 	attach_entity_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11188) 	if (!vruntime_normalized(p))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11189) 		se->vruntime += cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11192) static void switched_from_fair(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11193) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11194) 	detach_task_cfs_rq(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11197) static void switched_to_fair(struct rq *rq, struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11199) 	attach_task_cfs_rq(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11201) 	if (task_on_rq_queued(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11202) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11203) 		 * We were most likely switched from sched_rt, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11204) 		 * kick off the schedule if running, otherwise just see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11205) 		 * if we can still preempt the current task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11206) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11207) 		if (rq->curr == p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11208) 			resched_curr(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11209) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11210) 			check_preempt_curr(rq, p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11211) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11214) /* Account for a task changing its policy or group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11215)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11216)  * This routine is mostly called to set cfs_rq->curr field when a task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11217)  * migrates between groups/classes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11218)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11219) static void set_next_task_fair(struct rq *rq, struct task_struct *p, bool first)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11221) 	struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11223) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11224) 	if (task_on_rq_queued(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11225) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11226) 		 * Move the next running task to the front of the list, so our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11227) 		 * cfs_tasks list becomes MRU one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11228) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11229) 		list_move(&se->group_node, &rq->cfs_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11230) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11231) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11233) 	for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11234) 		struct cfs_rq *cfs_rq = cfs_rq_of(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11236) 		set_next_entity(cfs_rq, se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11237) 		/* ensure bandwidth has been allocated on our new cfs_rq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11238) 		account_cfs_rq_runtime(cfs_rq, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11239) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11242) void init_cfs_rq(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11243) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11244) 	cfs_rq->tasks_timeline = RB_ROOT_CACHED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11245) 	cfs_rq->min_vruntime = (u64)(-(1LL << 20));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11246) #ifndef CONFIG_64BIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11247) 	cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11248) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11249) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11250) 	raw_spin_lock_init(&cfs_rq->removed.lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11251) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11254) #ifdef CONFIG_FAIR_GROUP_SCHED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11255) static void task_set_group_fair(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11257) 	struct sched_entity *se = &p->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11259) 	set_task_rq(p, task_cpu(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11260) 	se->depth = se->parent ? se->parent->depth + 1 : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11263) static void task_move_group_fair(struct task_struct *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11264) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11265) 	detach_task_cfs_rq(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11266) 	set_task_rq(p, task_cpu(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11268) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11269) 	/* Tell se's cfs_rq has been changed -- migrated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11270) 	p->se.avg.last_update_time = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11271) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11272) 	attach_task_cfs_rq(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11275) static void task_change_group_fair(struct task_struct *p, int type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11276) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11277) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11278) 	case TASK_SET_GROUP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11279) 		task_set_group_fair(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11280) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11282) 	case TASK_MOVE_GROUP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11283) 		task_move_group_fair(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11284) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11285) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11288) void free_fair_sched_group(struct task_group *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11289) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11290) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11292) 	destroy_cfs_bandwidth(tg_cfs_bandwidth(tg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11294) 	for_each_possible_cpu(i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11295) 		if (tg->cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11296) 			kfree(tg->cfs_rq[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11297) 		if (tg->se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11298) 			kfree(tg->se[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11299) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11301) 	kfree(tg->cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11302) 	kfree(tg->se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11305) int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11306) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11307) 	struct sched_entity *se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11308) 	struct cfs_rq *cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11309) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11311) 	tg->cfs_rq = kcalloc(nr_cpu_ids, sizeof(cfs_rq), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11312) 	if (!tg->cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11313) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11314) 	tg->se = kcalloc(nr_cpu_ids, sizeof(se), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11315) 	if (!tg->se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11316) 		goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11318) 	tg->shares = NICE_0_LOAD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11320) 	init_cfs_bandwidth(tg_cfs_bandwidth(tg));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11322) 	for_each_possible_cpu(i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11323) 		cfs_rq = kzalloc_node(sizeof(struct cfs_rq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11324) 				      GFP_KERNEL, cpu_to_node(i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11325) 		if (!cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11326) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11328) 		se = kzalloc_node(sizeof(struct sched_entity),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11329) 				  GFP_KERNEL, cpu_to_node(i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11330) 		if (!se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11331) 			goto err_free_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11333) 		init_cfs_rq(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11334) 		init_tg_cfs_entry(tg, cfs_rq, se, i, parent->se[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11335) 		init_entity_runnable_average(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11336) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11338) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11340) err_free_rq:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11341) 	kfree(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11342) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11343) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11346) void online_fair_sched_group(struct task_group *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11348) 	struct sched_entity *se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11349) 	struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11350) 	struct rq *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11351) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11353) 	for_each_possible_cpu(i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11354) 		rq = cpu_rq(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11355) 		se = tg->se[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11356) 		rq_lock_irq(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11357) 		update_rq_clock(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11358) 		attach_entity_cfs_rq(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11359) 		sync_throttle(tg, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11360) 		rq_unlock_irq(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11361) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11364) void unregister_fair_sched_group(struct task_group *tg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11365) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11366) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11367) 	struct rq *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11368) 	int cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11370) 	for_each_possible_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11371) 		if (tg->se[cpu])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11372) 			remove_entity_load_avg(tg->se[cpu]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11374) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11375) 		 * Only empty task groups can be destroyed; so we can speculatively
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11376) 		 * check on_list without danger of it being re-added.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11377) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11378) 		if (!tg->cfs_rq[cpu]->on_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11379) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11381) 		rq = cpu_rq(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11383) 		raw_spin_lock_irqsave(&rq->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11384) 		list_del_leaf_cfs_rq(tg->cfs_rq[cpu]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11385) 		raw_spin_unlock_irqrestore(&rq->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11386) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11389) void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11390) 			struct sched_entity *se, int cpu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11391) 			struct sched_entity *parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11392) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11393) 	struct rq *rq = cpu_rq(cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11395) 	cfs_rq->tg = tg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11396) 	cfs_rq->rq = rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11397) 	init_cfs_rq_runtime(cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11399) 	tg->cfs_rq[cpu] = cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11400) 	tg->se[cpu] = se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11402) 	/* se could be NULL for root_task_group */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11403) 	if (!se)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11404) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11406) 	if (!parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11407) 		se->cfs_rq = &rq->cfs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11408) 		se->depth = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11409) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11410) 		se->cfs_rq = parent->my_q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11411) 		se->depth = parent->depth + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11412) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11414) 	se->my_q = cfs_rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11415) 	/* guarantee group entities always have weight */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11416) 	update_load_set(&se->load, NICE_0_LOAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11417) 	se->parent = parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11420) static DEFINE_MUTEX(shares_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11422) int sched_group_set_shares(struct task_group *tg, unsigned long shares)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11423) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11424) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11426) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11427) 	 * We can't change the weight of the root cgroup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11428) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11429) 	if (!tg->se[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11430) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11432) 	shares = clamp(shares, scale_load(MIN_SHARES), scale_load(MAX_SHARES));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11434) 	mutex_lock(&shares_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11435) 	if (tg->shares == shares)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11436) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11438) 	tg->shares = shares;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11439) 	for_each_possible_cpu(i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11440) 		struct rq *rq = cpu_rq(i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11441) 		struct sched_entity *se = tg->se[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11442) 		struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11444) 		/* Propagate contribution to hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11445) 		rq_lock_irqsave(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11446) 		update_rq_clock(rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11447) 		for_each_sched_entity(se) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11448) 			update_load_avg(cfs_rq_of(se), se, UPDATE_TG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11449) 			update_cfs_group(se);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11450) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11451) 		rq_unlock_irqrestore(rq, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11452) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11454) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11455) 	mutex_unlock(&shares_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11456) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11458) #else /* CONFIG_FAIR_GROUP_SCHED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11460) void free_fair_sched_group(struct task_group *tg) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11462) int alloc_fair_sched_group(struct task_group *tg, struct task_group *parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11463) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11464) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11467) void online_fair_sched_group(struct task_group *tg) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11469) void unregister_fair_sched_group(struct task_group *tg) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11471) #endif /* CONFIG_FAIR_GROUP_SCHED */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11474) static unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11475) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11476) 	struct sched_entity *se = &task->se;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11477) 	unsigned int rr_interval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11479) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11480) 	 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11481) 	 * idle runqueue:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11482) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11483) 	if (rq->cfs.load.weight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11484) 		rr_interval = NS_TO_JIFFIES(sched_slice(cfs_rq_of(se), se));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11486) 	return rr_interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11489) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11490)  * All the scheduling class methods:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11491)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11492) const struct sched_class fair_sched_class
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11493) 	__section("__fair_sched_class") = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11494) 	.enqueue_task		= enqueue_task_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11495) 	.dequeue_task		= dequeue_task_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11496) 	.yield_task		= yield_task_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11497) 	.yield_to_task		= yield_to_task_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11499) 	.check_preempt_curr	= check_preempt_wakeup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11501) 	.pick_next_task		= __pick_next_task_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11502) 	.put_prev_task		= put_prev_task_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11503) 	.set_next_task          = set_next_task_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11505) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11506) 	.balance		= balance_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11507) 	.select_task_rq		= select_task_rq_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11508) 	.migrate_task_rq	= migrate_task_rq_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11510) 	.rq_online		= rq_online_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11511) 	.rq_offline		= rq_offline_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11513) 	.task_dead		= task_dead_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11514) 	.set_cpus_allowed	= set_cpus_allowed_common,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11515) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11517) 	.task_tick		= task_tick_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11518) 	.task_fork		= task_fork_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11520) 	.prio_changed		= prio_changed_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11521) 	.switched_from		= switched_from_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11522) 	.switched_to		= switched_to_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11524) 	.get_rr_interval	= get_rr_interval_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11526) 	.update_curr		= update_curr_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11528) #ifdef CONFIG_FAIR_GROUP_SCHED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11529) 	.task_change_group	= task_change_group_fair,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11530) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11532) #ifdef CONFIG_UCLAMP_TASK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11533) 	.uclamp_enabled		= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11534) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11535) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11537) #ifdef CONFIG_SCHED_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11538) void print_cfs_stats(struct seq_file *m, int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11540) 	struct cfs_rq *cfs_rq, *pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11541) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11542) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11543) 	for_each_leaf_cfs_rq_safe(cpu_rq(cpu), cfs_rq, pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11544) 		print_cfs_rq(m, cpu, cfs_rq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11545) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11548) #ifdef CONFIG_NUMA_BALANCING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11549) void show_numa_stats(struct task_struct *p, struct seq_file *m)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11551) 	int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11552) 	unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11553) 	struct numa_group *ng;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11554) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11555) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11556) 	ng = rcu_dereference(p->numa_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11557) 	for_each_online_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11558) 		if (p->numa_faults) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11559) 			tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11560) 			tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11561) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11562) 		if (ng) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11563) 			gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11564) 			gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11565) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11566) 		print_numa_stats(m, node, tsf, tpf, gsf, gpf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11567) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11568) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11570) #endif /* CONFIG_NUMA_BALANCING */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11571) #endif /* CONFIG_SCHED_DEBUG */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11573) __init void init_sched_fair_class(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11574) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11575) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11576) 	open_softirq(SCHED_SOFTIRQ, run_rebalance_domains);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11578) #ifdef CONFIG_NO_HZ_COMMON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11579) 	nohz.next_balance = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11580) 	nohz.next_blocked = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11581) 	zalloc_cpumask_var(&nohz.idle_cpus_mask, GFP_NOWAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11582) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11583) #endif /* SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11587) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11588)  * Helper functions to facilitate extracting info from tracepoints.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11589)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11591) const struct sched_avg *sched_trace_cfs_rq_avg(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11593) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11594) 	return cfs_rq ? &cfs_rq->avg : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11595) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11596) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11597) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11599) EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_avg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11601) char *sched_trace_cfs_rq_path(struct cfs_rq *cfs_rq, char *str, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11602) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11603) 	if (!cfs_rq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11604) 		if (str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11605) 			strlcpy(str, "(null)", len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11606) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11607) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11608) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11610) 	cfs_rq_tg_path(cfs_rq, str, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11611) 	return str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11613) EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11615) int sched_trace_cfs_rq_cpu(struct cfs_rq *cfs_rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11616) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11617) 	return cfs_rq ? cpu_of(rq_of(cfs_rq)) : -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11618) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11619) EXPORT_SYMBOL_GPL(sched_trace_cfs_rq_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11620) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11621) const struct sched_avg *sched_trace_rq_avg_rt(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11622) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11623) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11624) 	return rq ? &rq->avg_rt : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11625) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11626) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11627) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11629) EXPORT_SYMBOL_GPL(sched_trace_rq_avg_rt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11630) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11631) const struct sched_avg *sched_trace_rq_avg_dl(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11632) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11633) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11634) 	return rq ? &rq->avg_dl : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11635) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11636) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11637) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11639) EXPORT_SYMBOL_GPL(sched_trace_rq_avg_dl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11641) const struct sched_avg *sched_trace_rq_avg_irq(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11643) #if defined(CONFIG_SMP) && defined(CONFIG_HAVE_SCHED_AVG_IRQ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11644) 	return rq ? &rq->avg_irq : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11645) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11646) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11647) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11649) EXPORT_SYMBOL_GPL(sched_trace_rq_avg_irq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11651) int sched_trace_rq_cpu(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11652) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11653) 	return rq ? cpu_of(rq) : -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11655) EXPORT_SYMBOL_GPL(sched_trace_rq_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11657) int sched_trace_rq_cpu_capacity(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11659) 	return rq ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11660) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11661) 		rq->cpu_capacity
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11662) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11663) 		SCHED_CAPACITY_SCALE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11664) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11665) 		: -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11667) EXPORT_SYMBOL_GPL(sched_trace_rq_cpu_capacity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11669) const struct cpumask *sched_trace_rd_span(struct root_domain *rd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11670) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11671) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11672) 	return rd ? rd->span : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11673) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11674) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11675) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11677) EXPORT_SYMBOL_GPL(sched_trace_rd_span);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11679) int sched_trace_rq_nr_running(struct rq *rq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11680) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11681)         return rq ? rq->nr_running : -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11683) EXPORT_SYMBOL_GPL(sched_trace_rq_nr_running);