Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2)  *  kernel/cpuset.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *  Processor and Memory placement constraints for sets of tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *  Copyright (C) 2003 BULL SA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  *  Copyright (C) 2004-2007 Silicon Graphics, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  *  Copyright (C) 2006 Google, Inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  *  Portions derived from Patrick Mochel's sysfs code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  *  sysfs is Copyright (c) 2001-3 Patrick Mochel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  *  2003-10-10 Written by Simon Derr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  *  2003-10-22 Updates by Stephen Hemminger.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  *  2004 May-July Rework by Paul Jackson.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16)  *  2006 Rework by Paul Menage to use generic cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17)  *  2008 Rework of the scheduler domains and CPU hotplug handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18)  *       by Max Krasnyansky
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20)  *  This file is subject to the terms and conditions of the GNU General Public
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21)  *  License.  See the file COPYING in the main directory of the Linux
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22)  *  distribution for more details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <linux/cpumask.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <linux/cpuset.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <linux/err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include <linux/interrupt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) #include <linux/kmod.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) #include <linux/list.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #include <linux/mempolicy.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) #include <linux/memory.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) #include <linux/mount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) #include <linux/fs_context.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #include <linux/namei.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) #include <linux/proc_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) #include <linux/rcupdate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) #include <linux/sched/deadline.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) #include <linux/sched/task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) #include <linux/seq_file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) #include <linux/security.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #include <linux/stat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) #include <linux/time.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) #include <linux/time64.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) #include <linux/sort.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) #include <linux/oom.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) #include <linux/sched/isolation.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) #include <linux/uaccess.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) #include <linux/atomic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) #include <linux/cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) #include <linux/wait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) #include <trace/hooks/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) #include <trace/hooks/cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) DEFINE_STATIC_KEY_FALSE(cpusets_pre_enable_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) DEFINE_STATIC_KEY_FALSE(cpusets_enabled_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) /* See "Frequency meter" comments, below. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) struct fmeter {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 	int cnt;		/* unprocessed events count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	int val;		/* most recent output value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	time64_t time;		/* clock (secs) when val computed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 	spinlock_t lock;	/* guards read or write of above */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) struct cpuset {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	struct cgroup_subsys_state css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 	unsigned long flags;		/* "unsigned long" so bitops work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	 * On default hierarchy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	 * The user-configured masks can only be changed by writing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	 * cpuset.cpus and cpuset.mems, and won't be limited by the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 	 * parent masks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	 * The effective masks is the real masks that apply to the tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 	 * in the cpuset. They may be changed if the configured masks are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 	 * changed or hotplug happens.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 	 * effective_mask == configured_mask & parent's effective_mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	 * and if it ends up empty, it will inherit the parent's mask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	 * On legacy hierachy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	 * The user-configured masks are always the same with effective masks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	/* user-configured CPUs and Memory Nodes allow to tasks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	cpumask_var_t cpus_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	cpumask_var_t cpus_requested;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	nodemask_t mems_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 	/* effective CPUs and Memory Nodes allow to tasks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	cpumask_var_t effective_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	nodemask_t effective_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	 * CPUs allocated to child sub-partitions (default hierarchy only)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	 * - CPUs granted by the parent = effective_cpus U subparts_cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	 * - effective_cpus and subparts_cpus are mutually exclusive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	 * effective_cpus contains only onlined CPUs, but subparts_cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	 * may have offlined ones.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	cpumask_var_t subparts_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	 * This is old Memory Nodes tasks took on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	 * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	 * - A new cpuset's old_mems_allowed is initialized when some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	 *   task is moved into it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	 * - old_mems_allowed is used in cpuset_migrate_mm() when we change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 	 *   cpuset.mems_allowed and have tasks' nodemask updated, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 	 *   then old_mems_allowed is updated to mems_allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 	nodemask_t old_mems_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 	struct fmeter fmeter;		/* memory_pressure filter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	 * Tasks are being attached to this cpuset.  Used to prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	 * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	int attach_in_progress;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	/* partition number for rebuild_sched_domains() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 	int pn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	/* for custom sched domain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	int relax_domain_level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 	/* number of CPUs in subparts_cpus */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	int nr_subparts_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 	/* partition root state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	int partition_root_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	 * Default hierarchy only:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	 * use_parent_ecpus - set if using parent's effective_cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	 * child_ecpus_count - # of children with use_parent_ecpus set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 	int use_parent_ecpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	int child_ecpus_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170)  * Partition root states:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172)  *   0 - not a partition root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174)  *   1 - partition root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176)  *  -1 - invalid partition root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177)  *       None of the cpus in cpus_allowed can be put into the parent's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178)  *       subparts_cpus. In this case, the cpuset is not a real partition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179)  *       root anymore.  However, the CPU_EXCLUSIVE bit will still be set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180)  *       and the cpuset can be restored back to a partition root if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181)  *       parent cpuset can give more CPUs back to this child cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) #define PRS_DISABLED		0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) #define PRS_ENABLED		1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) #define PRS_ERROR		-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188)  * Temporary cpumasks for working with partitions that are passed among
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189)  * functions to avoid memory allocation in inner functions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) struct tmpmasks {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	cpumask_var_t addmask, delmask;	/* For partition root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	cpumask_var_t new_cpus;		/* For update_cpumasks_hier() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	return css ? container_of(css, struct cpuset, css) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) /* Retrieve the cpuset for a task */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) static inline struct cpuset *task_cs(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	return css_cs(task_css(task, cpuset_cgrp_id));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) static inline struct cpuset *parent_cs(struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	return css_cs(cs->css.parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) /* bits in struct cpuset flags field */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) typedef enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	CS_ONLINE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	CS_CPU_EXCLUSIVE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	CS_MEM_EXCLUSIVE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	CS_MEM_HARDWALL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	CS_MEMORY_MIGRATE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	CS_SCHED_LOAD_BALANCE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 	CS_SPREAD_PAGE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	CS_SPREAD_SLAB,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) } cpuset_flagbits_t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) /* convenient tests for these bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) static inline bool is_cpuset_online(struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 	return test_bit(CS_ONLINE, &cs->flags) && !css_is_dying(&cs->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) static inline int is_cpu_exclusive(const struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) static inline int is_mem_exclusive(const struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) static inline int is_mem_hardwall(const struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	return test_bit(CS_MEM_HARDWALL, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) static inline int is_sched_load_balance(const struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) static inline int is_memory_migrate(const struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) static inline int is_spread_page(const struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 	return test_bit(CS_SPREAD_PAGE, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) static inline int is_spread_slab(const struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	return test_bit(CS_SPREAD_SLAB, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) static inline int is_partition_root(const struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 	return cs->partition_root_state > 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) static struct cpuset top_cpuset = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 	.flags = ((1 << CS_ONLINE) | (1 << CS_CPU_EXCLUSIVE) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 		  (1 << CS_MEM_EXCLUSIVE)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 	.partition_root_state = PRS_ENABLED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277)  * cpuset_for_each_child - traverse online children of a cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278)  * @child_cs: loop cursor pointing to the current child
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279)  * @pos_css: used for iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280)  * @parent_cs: target cpuset to walk children of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282)  * Walk @child_cs through the online children of @parent_cs.  Must be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283)  * with RCU read locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) #define cpuset_for_each_child(child_cs, pos_css, parent_cs)		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	css_for_each_child((pos_css), &(parent_cs)->css)		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 		if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290)  * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291)  * @des_cs: loop cursor pointing to the current descendant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292)  * @pos_css: used for iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293)  * @root_cs: target cpuset to walk ancestor of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295)  * Walk @des_cs through the online descendants of @root_cs.  Must be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296)  * with RCU read locked.  The caller may modify @pos_css by calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297)  * css_rightmost_descendant() to skip subtree.  @root_cs is included in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298)  * iteration and the first node to be visited.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs)	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	css_for_each_descendant_pre((pos_css), &(root_cs)->css)		\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 		if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305)  * There are two global locks guarding cpuset structures - cpuset_mutex and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306)  * callback_lock. We also require taking task_lock() when dereferencing a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307)  * task's cpuset pointer. See "The task_lock() exception", at the end of this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308)  * comment.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310)  * A task must hold both locks to modify cpusets.  If a task holds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311)  * cpuset_mutex, then it blocks others wanting that mutex, ensuring that it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312)  * is the only task able to also acquire callback_lock and be able to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313)  * modify cpusets.  It can perform various checks on the cpuset structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314)  * first, knowing nothing will change.  It can also allocate memory while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315)  * just holding cpuset_mutex.  While it is performing these checks, various
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316)  * callback routines can briefly acquire callback_lock to query cpusets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317)  * Once it is ready to make the changes, it takes callback_lock, blocking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318)  * everyone else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320)  * Calls to the kernel memory allocator can not be made while holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321)  * callback_lock, as that would risk double tripping on callback_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322)  * from one of the callbacks into the cpuset code from within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323)  * __alloc_pages().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325)  * If a task is only holding callback_lock, then it has read-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326)  * access to cpusets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328)  * Now, the task_struct fields mems_allowed and mempolicy may be changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329)  * by other task, we use alloc_lock in the task_struct fields to protect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330)  * them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332)  * The cpuset_common_file_read() handlers only hold callback_lock across
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333)  * small pieces of code, such as when reading out possibly multi-word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334)  * cpumasks and nodemasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336)  * Accessing a task's cpuset should be done in accordance with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337)  * guidelines for accessing subsystem state in kernel/cgroup.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) static DEFINE_MUTEX(cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) static DEFINE_SPINLOCK(callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) static struct workqueue_struct *cpuset_migrate_mm_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346)  * CPU / memory hotplug is handled asynchronously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347)  * for hotplug, synchronously for resume_cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) static DECLARE_WORK(cpuset_hotplug_work, cpuset_hotplug_workfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) static DECLARE_WAIT_QUEUE_HEAD(cpuset_attach_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354)  * Cgroup v2 behavior is used on the "cpus" and "mems" control files when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355)  * on default hierarchy or when the cpuset_v2_mode flag is set by mounting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356)  * the v1 cpuset cgroup filesystem with the "cpuset_v2_mode" mount option.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357)  * With v2 behavior, "cpus" and "mems" are always what the users have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358)  * requested and won't be changed by hotplug events. Only the effective
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359)  * cpus or mems will be affected.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) static inline bool is_in_v2_mode(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	return cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	      (cpuset_cgrp_subsys.root->flags & CGRP_ROOT_CPUSET_V2_MODE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368)  * Return in pmask the portion of a task's cpusets's cpus_allowed that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369)  * are online and are capable of running the task.  If none are found,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370)  * walk up the cpuset hierarchy until we find one that does have some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371)  * appropriate cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373)  * One way or another, we guarantee to return some non-empty subset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374)  * of cpu_active_mask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376)  * Call with callback_lock or cpuset_mutex held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) static void guarantee_online_cpus(struct task_struct *tsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 				  struct cpumask *pmask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 	struct cpuset *cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 	if (WARN_ON(!cpumask_and(pmask, possible_mask, cpu_active_mask)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 		cpumask_copy(pmask, cpu_active_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	cs = task_cs(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	while (!cpumask_intersects(cs->effective_cpus, pmask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 		cs = parent_cs(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 		if (unlikely(!cs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 			 * The top cpuset doesn't have any online cpu as a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 			 * consequence of a race between cpuset_hotplug_work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 			 * and cpu hotplug notifier.  But we know the top
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 			 * cpuset's effective_cpus is on its way to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 			 * identical to cpu_online_mask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	cpumask_and(pmask, pmask, cs->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410)  * Return in *pmask the portion of a cpusets's mems_allowed that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411)  * are online, with memory.  If none are online with memory, walk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412)  * up the cpuset hierarchy until we find one that does have some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413)  * online mems.  The top cpuset always has some mems online.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415)  * One way or another, we guarantee to return some non-empty subset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416)  * of node_states[N_MEMORY].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418)  * Call with callback_lock or cpuset_mutex held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) static void guarantee_online_mems(struct cpuset *cs, nodemask_t *pmask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	while (!nodes_intersects(cs->effective_mems, node_states[N_MEMORY]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 		cs = parent_cs(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	nodes_and(*pmask, cs->effective_mems, node_states[N_MEMORY]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428)  * update task's spread flag if cpuset's page/slab spread flag is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430)  * Call with callback_lock or cpuset_mutex held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) static void cpuset_update_task_spread_flag(struct cpuset *cs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 					struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	if (is_spread_page(cs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 		task_set_spread_page(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 		task_clear_spread_page(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	if (is_spread_slab(cs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 		task_set_spread_slab(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 		task_clear_spread_slab(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447)  * is_cpuset_subset(p, q) - Is cpuset p a subset of cpuset q?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449)  * One cpuset is a subset of another if all its allowed CPUs and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450)  * Memory Nodes are a subset of the other, and its exclusive flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451)  * are only set if the other's are set.  Call holding cpuset_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	return	cpumask_subset(p->cpus_requested, q->cpus_requested) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 		nodes_subset(p->mems_allowed, q->mems_allowed) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 		is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 		is_mem_exclusive(p) <= is_mem_exclusive(q);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463)  * alloc_cpumasks - allocate three cpumasks for cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464)  * @cs:  the cpuset that have cpumasks to be allocated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465)  * @tmp: the tmpmasks structure pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466)  * Return: 0 if successful, -ENOMEM otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468)  * Only one of the two input arguments should be non-NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) static inline int alloc_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	cpumask_var_t *pmask1, *pmask2, *pmask3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	if (cs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 		pmask1 = &cs->cpus_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 		pmask2 = &cs->effective_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 		pmask3 = &cs->subparts_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 		pmask1 = &tmp->new_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 		pmask2 = &tmp->addmask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 		pmask3 = &tmp->delmask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	if (!zalloc_cpumask_var(pmask1, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	if (!zalloc_cpumask_var(pmask2, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 		goto free_one;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 	if (!zalloc_cpumask_var(pmask3, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 		goto free_two;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	if (cs && !zalloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 		goto free_three;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) free_three:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 	free_cpumask_var(*pmask3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) free_two:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	free_cpumask_var(*pmask2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) free_one:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 	free_cpumask_var(*pmask1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508)  * free_cpumasks - free cpumasks in a tmpmasks structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509)  * @cs:  the cpuset that have cpumasks to be free.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510)  * @tmp: the tmpmasks structure pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) static inline void free_cpumasks(struct cpuset *cs, struct tmpmasks *tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	if (cs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 		free_cpumask_var(cs->cpus_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 		free_cpumask_var(cs->cpus_requested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 		free_cpumask_var(cs->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 		free_cpumask_var(cs->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	if (tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 		free_cpumask_var(tmp->new_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 		free_cpumask_var(tmp->addmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 		free_cpumask_var(tmp->delmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528)  * alloc_trial_cpuset - allocate a trial cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529)  * @cs: the cpuset that the trial cpuset duplicates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) static struct cpuset *alloc_trial_cpuset(struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	struct cpuset *trial;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 	trial = kmemdup(cs, sizeof(*cs), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	if (!trial)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	if (alloc_cpumasks(trial, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 		kfree(trial);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	cpumask_copy(trial->cpus_allowed, cs->cpus_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	cpumask_copy(trial->cpus_requested, cs->cpus_requested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 	cpumask_copy(trial->effective_cpus, cs->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	return trial;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551)  * free_cpuset - free the cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552)  * @cs: the cpuset to be freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) static inline void free_cpuset(struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 	free_cpumasks(cs, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 	kfree(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561)  * validate_change() - Used to validate that any proposed cpuset change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562)  *		       follows the structural rules for cpusets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564)  * If we replaced the flag and mask values of the current cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565)  * (cur) with those values in the trial cpuset (trial), would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566)  * our various subset and exclusive rules still be valid?  Presumes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567)  * cpuset_mutex held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569)  * 'cur' is the address of an actual, in-use cpuset.  Operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570)  * such as list traversal that depend on the actual address of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571)  * cpuset in the list must use cur below, not trial.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573)  * 'trial' is the address of bulk structure copy of cur, with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574)  * perhaps one or more of the fields cpus_allowed, mems_allowed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575)  * or flags changed to new, trial values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577)  * Return 0 if valid, -errno if not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) static int validate_change(struct cpuset *cur, struct cpuset *trial)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 	struct cpuset *c, *par;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	/* Each of our child cpusets must be a subset of us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	cpuset_for_each_child(c, css, cur)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 		if (!is_cpuset_subset(c, trial))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	/* Remaining checks don't apply to root cpuset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	if (cur == &top_cpuset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	par = parent_cs(cur);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 	/* On legacy hiearchy, we must be a subset of our parent cpuset. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 	ret = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 	if (!is_in_v2_mode() && !is_cpuset_subset(trial, par))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	 * If either I or some sibling (!= me) is exclusive, we can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	 * overlap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	cpuset_for_each_child(c, css, par) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 		if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 		    c != cur &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 		    cpumask_intersects(trial->cpus_requested, c->cpus_requested))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 		if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 		    c != cur &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 		    nodes_intersects(trial->mems_allowed, c->mems_allowed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	 * Cpusets with tasks - existing or newly being attached - can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	 * be changed to have empty cpus_allowed or mems_allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	if ((cgroup_is_populated(cur->css.cgroup) || cur->attach_in_progress)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 		if (!cpumask_empty(cur->cpus_allowed) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 		    cpumask_empty(trial->cpus_allowed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 		if (!nodes_empty(cur->mems_allowed) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 		    nodes_empty(trial->mems_allowed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	 * We can't shrink if we won't have enough room for SCHED_DEADLINE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	 * tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	if (is_cpu_exclusive(cur) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	    !cpuset_cpumask_can_shrink(cur->cpus_allowed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 				       trial->cpus_allowed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654)  * Helper routine for generate_sched_domains().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655)  * Do cpusets a, b have overlapping effective cpus_allowed masks?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) static int cpusets_overlap(struct cpuset *a, struct cpuset *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	return cpumask_intersects(a->effective_cpus, b->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) update_domain_attr(struct sched_domain_attr *dattr, struct cpuset *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	if (dattr->relax_domain_level < c->relax_domain_level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 		dattr->relax_domain_level = c->relax_domain_level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) static void update_domain_attr_tree(struct sched_domain_attr *dattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 				    struct cpuset *root_cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	struct cpuset *cp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 	cpuset_for_each_descendant_pre(cp, pos_css, root_cs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 		/* skip the whole subtree if @cp doesn't have any CPU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 		if (cpumask_empty(cp->cpus_allowed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 			pos_css = css_rightmost_descendant(pos_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 		if (is_sched_load_balance(cp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 			update_domain_attr(dattr, cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) /* Must be called with cpuset_mutex held.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) static inline int nr_cpusets(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	/* jump label reference count + the top-level cpuset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	return static_key_count(&cpusets_enabled_key.key) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698)  * generate_sched_domains()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700)  * This function builds a partial partition of the systems CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701)  * A 'partial partition' is a set of non-overlapping subsets whose
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702)  * union is a subset of that set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703)  * The output of this function needs to be passed to kernel/sched/core.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704)  * partition_sched_domains() routine, which will rebuild the scheduler's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705)  * load balancing domains (sched domains) as specified by that partial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706)  * partition.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708)  * See "What is sched_load_balance" in Documentation/admin-guide/cgroup-v1/cpusets.rst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709)  * for a background explanation of this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711)  * Does not return errors, on the theory that the callers of this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712)  * routine would rather not worry about failures to rebuild sched
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713)  * domains when operating in the severe memory shortage situations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714)  * that could cause allocation failures below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716)  * Must be called with cpuset_mutex held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718)  * The three key local variables below are:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719)  *    cp - cpuset pointer, used (together with pos_css) to perform a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720)  *	   top-down scan of all cpusets. For our purposes, rebuilding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721)  *	   the schedulers sched domains, we can ignore !is_sched_load_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722)  *	   balance cpusets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723)  *  csa  - (for CpuSet Array) Array of pointers to all the cpusets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724)  *	   that need to be load balanced, for convenient iterative
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725)  *	   access by the subsequent code that finds the best partition,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726)  *	   i.e the set of domains (subsets) of CPUs such that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727)  *	   cpus_allowed of every cpuset marked is_sched_load_balance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728)  *	   is a subset of one of these domains, while there are as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729)  *	   many such domains as possible, each as small as possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730)  * doms  - Conversion of 'csa' to an array of cpumasks, for passing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731)  *	   the kernel/sched/core.c routine partition_sched_domains() in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732)  *	   convenient format, that can be easily compared to the prior
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733)  *	   value to determine what partition elements (sched domains)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734)  *	   were changed (added or removed.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736)  * Finding the best partition (set of domains):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737)  *	The triple nested loops below over i, j, k scan over the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738)  *	load balanced cpusets (using the array of cpuset pointers in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739)  *	csa[]) looking for pairs of cpusets that have overlapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740)  *	cpus_allowed, but which don't have the same 'pn' partition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741)  *	number and gives them in the same partition number.  It keeps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742)  *	looping on the 'restart' label until it can no longer find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743)  *	any such pairs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745)  *	The union of the cpus_allowed masks from the set of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746)  *	all cpusets having the same 'pn' value then form the one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747)  *	element of the partition (one sched domain) to be passed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748)  *	partition_sched_domains().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) static int generate_sched_domains(cpumask_var_t **domains,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 			struct sched_domain_attr **attributes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	struct cpuset *cp;	/* top-down scan of cpusets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	struct cpuset **csa;	/* array of all cpuset ptrs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	int csn;		/* how many cpuset ptrs in csa so far */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	int i, j, k;		/* indices for partition finding loops */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	cpumask_var_t *doms;	/* resulting partition; i.e. sched domains */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	struct sched_domain_attr *dattr;  /* attributes for custom domains */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	int ndoms = 0;		/* number of sched domains in result */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	int nslot;		/* next empty doms[] struct cpumask slot */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	bool root_load_balance = is_sched_load_balance(&top_cpuset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 	doms = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	dattr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 	csa = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	/* Special case for the 99% of systems with one, full, sched domain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	if (root_load_balance && !top_cpuset.nr_subparts_cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 		ndoms = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 		doms = alloc_sched_domains(ndoms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 		if (!doms)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 			goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 		dattr = kmalloc(sizeof(struct sched_domain_attr), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 		if (dattr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 			*dattr = SD_ATTR_INIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 			update_domain_attr_tree(dattr, &top_cpuset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 		cpumask_and(doms[0], top_cpuset.effective_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 			    housekeeping_cpumask(HK_FLAG_DOMAIN));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	csa = kmalloc_array(nr_cpusets(), sizeof(cp), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 	if (!csa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	csn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	if (root_load_balance)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 		csa[csn++] = &top_cpuset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	cpuset_for_each_descendant_pre(cp, pos_css, &top_cpuset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 		if (cp == &top_cpuset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 		 * Continue traversing beyond @cp iff @cp has some CPUs and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 		 * isn't load balancing.  The former is obvious.  The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 		 * latter: All child cpusets contain a subset of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 		 * parent's cpus, so just skip them, and then we call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 		 * update_domain_attr_tree() to calc relax_domain_level of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 		 * the corresponding sched domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 		 * If root is load-balancing, we can skip @cp if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 		 * is a subset of the root's effective_cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 		if (!cpumask_empty(cp->cpus_allowed) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 		    !(is_sched_load_balance(cp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 		      cpumask_intersects(cp->cpus_allowed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 					 housekeeping_cpumask(HK_FLAG_DOMAIN))))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 		if (root_load_balance &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 		    cpumask_subset(cp->cpus_allowed, top_cpuset.effective_cpus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 		if (is_sched_load_balance(cp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 		    !cpumask_empty(cp->effective_cpus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 			csa[csn++] = cp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 		/* skip @cp's subtree if not a partition root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 		if (!is_partition_root(cp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 			pos_css = css_rightmost_descendant(pos_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	for (i = 0; i < csn; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 		csa[i]->pn = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 	ndoms = csn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 	/* Find the best partition (set of sched domains) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 	for (i = 0; i < csn; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 		struct cpuset *a = csa[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 		int apn = a->pn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 		for (j = 0; j < csn; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 			struct cpuset *b = csa[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 			int bpn = b->pn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 			if (apn != bpn && cpusets_overlap(a, b)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 				for (k = 0; k < csn; k++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 					struct cpuset *c = csa[k];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 					if (c->pn == bpn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 						c->pn = apn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 				ndoms--;	/* one less element */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 				goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	 * Now we know how many domains to create.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	 * Convert <csn, csa> to <ndoms, doms> and populate cpu masks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 	doms = alloc_sched_domains(ndoms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	if (!doms)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	 * The rest of the code, including the scheduler, can deal with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	 * dattr==NULL case. No need to abort if alloc fails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	dattr = kmalloc_array(ndoms, sizeof(struct sched_domain_attr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 			      GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	for (nslot = 0, i = 0; i < csn; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 		struct cpuset *a = csa[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 		struct cpumask *dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 		int apn = a->pn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 		if (apn < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 			/* Skip completed partitions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 		dp = doms[nslot];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 		if (nslot == ndoms) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 			static int warnings = 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 			if (warnings) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 				pr_warn("rebuild_sched_domains confused: nslot %d, ndoms %d, csn %d, i %d, apn %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 					nslot, ndoms, csn, i, apn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 				warnings--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 		cpumask_clear(dp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 		if (dattr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 			*(dattr + nslot) = SD_ATTR_INIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 		for (j = i; j < csn; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 			struct cpuset *b = csa[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 			if (apn == b->pn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 				cpumask_or(dp, dp, b->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 				cpumask_and(dp, dp, housekeeping_cpumask(HK_FLAG_DOMAIN));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 				if (dattr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 					update_domain_attr_tree(dattr + nslot, b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 				/* Done with this partition */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 				b->pn = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 		nslot++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	BUG_ON(nslot != ndoms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	kfree(csa);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 	 * Fallback to the default domain if kmalloc() failed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 	 * See comments in partition_sched_domains().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	if (doms == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 		ndoms = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 	*domains    = doms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	*attributes = dattr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 	return ndoms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) static void update_tasks_root_domain(struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	struct css_task_iter it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	css_task_iter_start(&cs->css, 0, &it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	while ((task = css_task_iter_next(&it)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 		dl_add_task_root_domain(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 	css_task_iter_end(&it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) static void rebuild_root_domains(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	struct cpuset *cs = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 	lockdep_assert_held(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	lockdep_assert_cpus_held();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	lockdep_assert_held(&sched_domains_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 	 * Clear default root domain DL accounting, it will be computed again
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 	 * if a task belongs to it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 	dl_clear_root_domain(&def_root_domain);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 	cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 		if (cpumask_empty(cs->effective_cpus)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 			pos_css = css_rightmost_descendant(pos_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 		css_get(&cs->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 		update_tasks_root_domain(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 		css_put(&cs->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) partition_and_rebuild_sched_domains(int ndoms_new, cpumask_var_t doms_new[],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 				    struct sched_domain_attr *dattr_new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	mutex_lock(&sched_domains_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	partition_sched_domains_locked(ndoms_new, doms_new, dattr_new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	rebuild_root_domains();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	mutex_unlock(&sched_domains_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987)  * Rebuild scheduler domains.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989)  * If the flag 'sched_load_balance' of any cpuset with non-empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990)  * 'cpus' changes, or if the 'cpus' allowed changes in any cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991)  * which has that flag enabled, or if any cpuset with a non-empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992)  * 'cpus' is removed, then call this routine to rebuild the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993)  * scheduler's dynamic sched domains.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995)  * Call with cpuset_mutex held.  Takes get_online_cpus().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) static void rebuild_sched_domains_locked(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	struct sched_domain_attr *attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	cpumask_var_t *doms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 	struct cpuset *cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 	int ndoms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 	lockdep_assert_held(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 	 * If we have raced with CPU hotplug, return early to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 	 * passing doms with offlined cpu to partition_sched_domains().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 	 * Anyways, cpuset_hotplug_workfn() will rebuild sched domains.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	 * With no CPUs in any subpartitions, top_cpuset's effective CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	 * should be the same as the active CPUs, so checking only top_cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	 * is enough to detect racing CPU offlines.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	if (!top_cpuset.nr_subparts_cpus &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	    !cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	 * With subpartition CPUs, however, the effective CPUs of a partition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	 * root should be only a subset of the active CPUs.  Since a CPU in any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	 * partition root could be offlined, all must be checked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 	if (top_cpuset.nr_subparts_cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 		cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 			if (!is_partition_root(cs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 				pos_css = css_rightmost_descendant(pos_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 			if (!cpumask_subset(cs->effective_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 					    cpu_active_mask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 				rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	/* Generate domain masks and attrs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 	ndoms = generate_sched_domains(&doms, &attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	/* Have scheduler rebuild the domains */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	partition_and_rebuild_sched_domains(ndoms, doms, attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) #else /* !CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) static void rebuild_sched_domains_locked(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) #endif /* CONFIG_SMP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) void rebuild_sched_domains(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 	get_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 	rebuild_sched_domains_locked();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	put_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) static int update_cpus_allowed(struct cpuset *cs, struct task_struct *p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 				const struct cpumask *new_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	int ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	trace_android_rvh_update_cpus_allowed(p, cs->cpus_requested, new_mask, &ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	return set_cpus_allowed_ptr(p, new_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)  * update_tasks_cpumask - Update the cpumasks of tasks in the cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076)  * @cs: the cpuset in which each task's cpus_allowed mask needs to be changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078)  * Iterate through each task of @cs updating its cpus_allowed to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079)  * effective cpuset's.  As this function is called with cpuset_mutex held,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080)  * cpuset membership stays stable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) static void update_tasks_cpumask(struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 	struct css_task_iter it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 	css_task_iter_start(&cs->css, 0, &it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 	while ((task = css_task_iter_next(&it)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 		update_cpus_allowed(cs, task, cs->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	css_task_iter_end(&it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094)  * compute_effective_cpumask - Compute the effective cpumask of the cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)  * @new_cpus: the temp variable for the new effective_cpus mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096)  * @cs: the cpuset the need to recompute the new effective_cpus mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097)  * @parent: the parent cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)  * If the parent has subpartition CPUs, include them in the list of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100)  * allowable CPUs in computing the new effective_cpus mask. Since offlined
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101)  * CPUs are not removed from subparts_cpus, we have to use cpu_active_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102)  * to mask those out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) static void compute_effective_cpumask(struct cpumask *new_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 				      struct cpuset *cs, struct cpuset *parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	if (parent->nr_subparts_cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 		cpumask_or(new_cpus, parent->effective_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 			   parent->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 		cpumask_and(new_cpus, new_cpus, cs->cpus_requested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 		cpumask_and(new_cpus, new_cpus, cpu_active_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 		cpumask_and(new_cpus, cs->cpus_requested, parent_cs(cs)->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)  * Commands for update_parent_subparts_cpumask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) enum subparts_cmd {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	partcmd_enable,		/* Enable partition root	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	partcmd_disable,	/* Disable partition root	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	partcmd_update,		/* Update parent's subparts_cpus */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)  * update_parent_subparts_cpumask - update subparts_cpus mask of parent cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128)  * @cpuset:  The cpuset that requests change in partition root state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129)  * @cmd:     Partition root state change command
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)  * @newmask: Optional new cpumask for partcmd_update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)  * @tmp:     Temporary addmask and delmask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)  * Return:   0, 1 or an error code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)  * For partcmd_enable, the cpuset is being transformed from a non-partition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)  * root to a partition root. The cpus_allowed mask of the given cpuset will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136)  * be put into parent's subparts_cpus and taken away from parent's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137)  * effective_cpus. The function will return 0 if all the CPUs listed in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138)  * cpus_allowed can be granted or an error code will be returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140)  * For partcmd_disable, the cpuset is being transofrmed from a partition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141)  * root back to a non-partition root. Any CPUs in cpus_allowed that are in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142)  * parent's subparts_cpus will be taken away from that cpumask and put back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143)  * into parent's effective_cpus. 0 should always be returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)  * For partcmd_update, if the optional newmask is specified, the cpu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)  * list is to be changed from cpus_allowed to newmask. Otherwise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)  * cpus_allowed is assumed to remain the same. The cpuset should either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)  * be a partition root or an invalid partition root. The partition root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149)  * state may change if newmask is NULL and none of the requested CPUs can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)  * be granted by the parent. The function will return 1 if changes to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151)  * parent's subparts_cpus and effective_cpus happen or 0 otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152)  * Error code should only be returned when newmask is non-NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)  * The partcmd_enable and partcmd_disable commands are used by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)  * update_prstate(). The partcmd_update command is used by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)  * update_cpumasks_hier() with newmask NULL and update_cpumask() with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157)  * newmask set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)  * The checking is more strict when enabling partition root than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160)  * other two commands.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)  * Because of the implicit cpu exclusive nature of a partition root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)  * cpumask changes that violates the cpu exclusivity rule will not be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)  * permitted when checked by validate_change(). The validate_change()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)  * function will also prevent any changes to the cpu list if it is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166)  * a superset of children's cpu lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 					  struct cpumask *newmask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 					  struct tmpmasks *tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	struct cpuset *parent = parent_cs(cpuset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	int adding;	/* Moving cpus from effective_cpus to subparts_cpus */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	int deleting;	/* Moving cpus from subparts_cpus to effective_cpus */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	int new_prs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	bool part_error = false;	/* Partition error? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	lockdep_assert_held(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	 * The parent must be a partition root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	 * The new cpumask, if present, or the current cpus_allowed must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	 * not be empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	if (!is_partition_root(parent) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	   (newmask && cpumask_empty(newmask)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	   (!newmask && cpumask_empty(cpuset->cpus_allowed)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	 * Enabling/disabling partition root is not allowed if there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	 * online children.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	if ((cmd != partcmd_update) && css_has_online_children(&cpuset->css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 		return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	 * Enabling partition root is not allowed if not all the CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 	 * can be granted from parent's effective_cpus or at least one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	 * CPU will be left after that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	if ((cmd == partcmd_enable) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	   (!cpumask_subset(cpuset->cpus_allowed, parent->effective_cpus) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	     cpumask_equal(cpuset->cpus_allowed, parent->effective_cpus)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 	 * A cpumask update cannot make parent's effective_cpus become empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	adding = deleting = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	new_prs = cpuset->partition_root_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	if (cmd == partcmd_enable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 		cpumask_copy(tmp->addmask, cpuset->cpus_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 		adding = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	} else if (cmd == partcmd_disable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 		deleting = cpumask_and(tmp->delmask, cpuset->cpus_allowed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 				       parent->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	} else if (newmask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 		 * partcmd_update with newmask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 		 * delmask = cpus_allowed & ~newmask & parent->subparts_cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 		 * addmask = newmask & parent->effective_cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 		 *		     & ~parent->subparts_cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 		cpumask_andnot(tmp->delmask, cpuset->cpus_allowed, newmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 		deleting = cpumask_and(tmp->delmask, tmp->delmask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 				       parent->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 		cpumask_and(tmp->addmask, newmask, parent->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 		adding = cpumask_andnot(tmp->addmask, tmp->addmask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 					parent->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 		 * Return error if the new effective_cpus could become empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 		if (adding &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 		    cpumask_equal(parent->effective_cpus, tmp->addmask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 			if (!deleting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 				return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 			 * As some of the CPUs in subparts_cpus might have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 			 * been offlined, we need to compute the real delmask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 			 * to confirm that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 			if (!cpumask_and(tmp->addmask, tmp->delmask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 					 cpu_active_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 				return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 			cpumask_copy(tmp->addmask, parent->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 		 * partcmd_update w/o newmask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 		 * addmask = cpus_allowed & parent->effective_cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 		 * Note that parent's subparts_cpus may have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 		 * pre-shrunk in case there is a change in the cpu list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 		 * So no deletion is needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 		adding = cpumask_and(tmp->addmask, cpuset->cpus_allowed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 				     parent->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 		part_error = cpumask_equal(tmp->addmask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 					   parent->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	if (cmd == partcmd_update) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 		int prev_prs = cpuset->partition_root_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 		 * Check for possible transition between PRS_ENABLED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 		 * and PRS_ERROR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 		switch (cpuset->partition_root_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 		case PRS_ENABLED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 			if (part_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 				new_prs = PRS_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 		case PRS_ERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 			if (!part_error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 				new_prs = PRS_ENABLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 		 * Set part_error if previously in invalid state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 		part_error = (prev_prs == PRS_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 	if (!part_error && (new_prs == PRS_ERROR))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 		return 0;	/* Nothing need to be done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 	if (new_prs == PRS_ERROR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 		 * Remove all its cpus from parent's subparts_cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 		adding = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 		deleting = cpumask_and(tmp->delmask, cpuset->cpus_allowed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 				       parent->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	if (!adding && !deleting && (new_prs == cpuset->partition_root_state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 	 * Change the parent's subparts_cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	 * Newly added CPUs will be removed from effective_cpus and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 	 * newly deleted ones will be added back to effective_cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 	spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 	if (adding) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 		cpumask_or(parent->subparts_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 			   parent->subparts_cpus, tmp->addmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 		cpumask_andnot(parent->effective_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 			       parent->effective_cpus, tmp->addmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 	if (deleting) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 		cpumask_andnot(parent->subparts_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 			       parent->subparts_cpus, tmp->delmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 		 * Some of the CPUs in subparts_cpus might have been offlined.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 		cpumask_and(tmp->delmask, tmp->delmask, cpu_active_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 		cpumask_or(parent->effective_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 			   parent->effective_cpus, tmp->delmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 	parent->nr_subparts_cpus = cpumask_weight(parent->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	if (cpuset->partition_root_state != new_prs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 		cpuset->partition_root_state = new_prs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 	spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	return cmd == partcmd_update;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)  * update_cpumasks_hier - Update effective cpumasks and tasks in the subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338)  * @cs:  the cpuset to consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)  * @tmp: temp variables for calculating effective_cpus & partition setup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341)  * When congifured cpumask is changed, the effective cpumasks of this cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342)  * and all its descendants need to be updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)  * On legacy hierachy, effective_cpus will be the same with cpu_allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)  * Called with cpuset_mutex held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 	struct cpuset *cp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 	bool need_rebuild_sched_domains = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 	int new_prs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 	cpuset_for_each_descendant_pre(cp, pos_css, cs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 		struct cpuset *parent = parent_cs(cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 		compute_effective_cpumask(tmp->new_cpus, cp, parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 		 * If it becomes empty, inherit the effective mask of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 		 * parent, which is guaranteed to have some CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 		if (is_in_v2_mode() && cpumask_empty(tmp->new_cpus)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 			cpumask_copy(tmp->new_cpus, parent->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 			if (!cp->use_parent_ecpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 				cp->use_parent_ecpus = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 				parent->child_ecpus_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 		} else if (cp->use_parent_ecpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 			cp->use_parent_ecpus = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 			WARN_ON_ONCE(!parent->child_ecpus_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 			parent->child_ecpus_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 		 * Skip the whole subtree if the cpumask remains the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 		 * and has no partition root state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 		if (!cp->partition_root_state &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 		    cpumask_equal(tmp->new_cpus, cp->effective_cpus)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 			pos_css = css_rightmost_descendant(pos_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 		 * update_parent_subparts_cpumask() should have been called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 		 * for cs already in update_cpumask(). We should also call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 		 * update_tasks_cpumask() again for tasks in the parent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 		 * cpuset if the parent's subparts_cpus changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 		new_prs = cp->partition_root_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 		if ((cp != cs) && new_prs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 			switch (parent->partition_root_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 			case PRS_DISABLED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 				 * If parent is not a partition root or an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 				 * invalid partition root, clear its state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 				 * and its CS_CPU_EXCLUSIVE flag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 				WARN_ON_ONCE(cp->partition_root_state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 					     != PRS_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 				new_prs = PRS_DISABLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 				 * clear_bit() is an atomic operation and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 				 * readers aren't interested in the state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 				 * of CS_CPU_EXCLUSIVE anyway. So we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 				 * just update the flag without holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 				 * the callback_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 				clear_bit(CS_CPU_EXCLUSIVE, &cp->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 			case PRS_ENABLED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 				if (update_parent_subparts_cpumask(cp, partcmd_update, NULL, tmp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 					update_tasks_cpumask(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 			case PRS_ERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 				 * When parent is invalid, it has to be too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 				new_prs = PRS_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 		if (!css_tryget_online(&cp->css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 		spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 		cpumask_copy(cp->effective_cpus, tmp->new_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 		if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 			cp->nr_subparts_cpus = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 			cpumask_clear(cp->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 		} else if (cp->nr_subparts_cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 			 * Make sure that effective_cpus & subparts_cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 			 * are mutually exclusive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 			 * In the unlikely event that effective_cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 			 * becomes empty. we clear cp->nr_subparts_cpus and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 			 * let its child partition roots to compete for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 			 * CPUs again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 			cpumask_andnot(cp->effective_cpus, cp->effective_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 				       cp->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 			if (cpumask_empty(cp->effective_cpus)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 				cpumask_copy(cp->effective_cpus, tmp->new_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 				cpumask_clear(cp->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 				cp->nr_subparts_cpus = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 			} else if (!cpumask_subset(cp->subparts_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 						   tmp->new_cpus)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 				cpumask_andnot(cp->subparts_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 					cp->subparts_cpus, tmp->new_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 				cp->nr_subparts_cpus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 					= cpumask_weight(cp->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 		if (new_prs != cp->partition_root_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 			cp->partition_root_state = new_prs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 		spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 		WARN_ON(!is_in_v2_mode() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 			!cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 		update_tasks_cpumask(cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 		 * On legacy hierarchy, if the effective cpumask of any non-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 		 * empty cpuset is changed, we need to rebuild sched domains.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 		 * On default hierarchy, the cpuset needs to be a partition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 		 * root as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 		if (!cpumask_empty(cp->cpus_allowed) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 		    is_sched_load_balance(cp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 		   (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 		    is_partition_root(cp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 			need_rebuild_sched_domains = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 		css_put(&cp->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 	if (need_rebuild_sched_domains)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 		rebuild_sched_domains_locked();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497)  * update_sibling_cpumasks - Update siblings cpumasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498)  * @parent:  Parent cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499)  * @cs:      Current cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500)  * @tmp:     Temp variables
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) static void update_sibling_cpumasks(struct cpuset *parent, struct cpuset *cs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 				    struct tmpmasks *tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	struct cpuset *sibling;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 	lockdep_assert_held(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	 * Check all its siblings and call update_cpumasks_hier()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	 * if their use_parent_ecpus flag is set in order for them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 	 * to use the right effective_cpus value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	 * The update_cpumasks_hier() function may sleep. So we have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 	 * release the RCU read lock before calling it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	cpuset_for_each_child(sibling, pos_css, parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 		if (sibling == cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 		if (!sibling->use_parent_ecpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 		if (!css_tryget_online(&sibling->css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 		update_cpumasks_hier(sibling, tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 		css_put(&sibling->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536)  * update_cpumask - update the cpus_allowed mask of a cpuset and all tasks in it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537)  * @cs: the cpuset to consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538)  * @trialcs: trial cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539)  * @buf: buffer of cpu numbers written to this cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 			  const char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 	int retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 	struct tmpmasks tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 	/* top_cpuset.cpus_allowed tracks cpu_online_mask; it's read-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 	if (cs == &top_cpuset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 		return -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 	 * An empty cpus_requested is ok only if the cpuset has no tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 	 * Since cpulist_parse() fails on an empty mask, we special case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 	 * that parsing.  The validate_change() call ensures that cpusets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 	 * with tasks have cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 	if (!*buf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 		cpumask_clear(trialcs->cpus_requested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 		retval = cpulist_parse(buf, trialcs->cpus_requested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 		if (retval < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 			return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 	if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 	cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 	/* Nothing to do if the cpus didn't change */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 	if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 	retval = validate_change(cs, trialcs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 	if (retval < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 		return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) #ifdef CONFIG_CPUMASK_OFFSTACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 	 * Use the cpumasks in trialcs for tmpmasks when they are pointers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 	 * to allocated cpumasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 	tmp.addmask  = trialcs->subparts_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 	tmp.delmask  = trialcs->effective_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	tmp.new_cpus = trialcs->cpus_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	if (cs->partition_root_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 		/* Cpumask of a partition root cannot be empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 		if (cpumask_empty(trialcs->cpus_allowed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 		if (update_parent_subparts_cpumask(cs, partcmd_update,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 					trialcs->cpus_allowed, &tmp) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 	spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 	cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 	cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 	 * Make sure that subparts_cpus is a subset of cpus_allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 	if (cs->nr_subparts_cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 		cpumask_and(cs->subparts_cpus, cs->subparts_cpus, cs->cpus_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 		cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 	spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 	update_cpumasks_hier(cs, &tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	if (cs->partition_root_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 		struct cpuset *parent = parent_cs(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 		 * For partition root, update the cpumasks of sibling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 		 * cpusets if they use parent's effective_cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 		if (parent->child_ecpus_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 			update_sibling_cpumasks(parent, cs, &tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626)  * Migrate memory region from one set of nodes to another.  This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627)  * performed asynchronously as it can be called from process migration path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628)  * holding locks involved in process management.  All mm migrations are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629)  * performed in the queued order and can be waited for by flushing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630)  * cpuset_migrate_mm_wq.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) struct cpuset_migrate_mm_work {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 	struct work_struct	work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 	struct mm_struct	*mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 	nodemask_t		from;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 	nodemask_t		to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) static void cpuset_migrate_mm_workfn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 	struct cpuset_migrate_mm_work *mwork =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 		container_of(work, struct cpuset_migrate_mm_work, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 	/* on a wq worker, no need to worry about %current's mems_allowed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 	do_migrate_pages(mwork->mm, &mwork->from, &mwork->to, MPOL_MF_MOVE_ALL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 	mmput(mwork->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 	kfree(mwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 							const nodemask_t *to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 	struct cpuset_migrate_mm_work *mwork;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 	mwork = kzalloc(sizeof(*mwork), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 	if (mwork) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 		mwork->mm = mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 		mwork->from = *from;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 		mwork->to = *to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 		INIT_WORK(&mwork->work, cpuset_migrate_mm_workfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 		queue_work(cpuset_migrate_mm_wq, &mwork->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 		mmput(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) static void cpuset_post_attach(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 	flush_workqueue(cpuset_migrate_mm_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674)  * cpuset_change_task_nodemask - change task's mems_allowed and mempolicy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675)  * @tsk: the task to change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676)  * @newmems: new nodes that the task will be set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678)  * We use the mems_allowed_seq seqlock to safely update both tsk->mems_allowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679)  * and rebind an eventual tasks' mempolicy. If the task is allocating in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680)  * parallel, it might temporarily see an empty intersection, which results in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681)  * a seqlock check and retry before OOM or allocation failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) static void cpuset_change_task_nodemask(struct task_struct *tsk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 					nodemask_t *newmems)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 	task_lock(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 	local_irq_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 	write_seqcount_begin(&tsk->mems_allowed_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 	nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 	mpol_rebind_task(tsk, newmems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 	tsk->mems_allowed = *newmems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 	write_seqcount_end(&tsk->mems_allowed_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 	local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 	task_unlock(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) static void *cpuset_being_rebound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704)  * update_tasks_nodemask - Update the nodemasks of tasks in the cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705)  * @cs: the cpuset in which each task's mems_allowed mask needs to be changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707)  * Iterate through each task of @cs updating its mems_allowed to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708)  * effective cpuset's.  As this function is called with cpuset_mutex held,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)  * cpuset membership stays stable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) static void update_tasks_nodemask(struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 	static nodemask_t newmems;	/* protected by cpuset_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 	struct css_task_iter it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 	cpuset_being_rebound = cs;		/* causes mpol_dup() rebind */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 	guarantee_online_mems(cs, &newmems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 	 * The mpol_rebind_mm() call takes mmap_lock, which we couldn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 	 * take while holding tasklist_lock.  Forks can happen - the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 	 * mpol_dup() cpuset_being_rebound check will catch such forks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 	 * and rebind their vma mempolicies too.  Because we still hold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 	 * the global cpuset_mutex, we know that no other rebind effort
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 	 * will be contending for the global variable cpuset_being_rebound.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 	 * It's ok if we rebind the same mm twice; mpol_rebind_mm()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 	 * is idempotent.  Also migrate pages in each mm to new nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 	css_task_iter_start(&cs->css, 0, &it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 	while ((task = css_task_iter_next(&it))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 		struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 		bool migrate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 		cpuset_change_task_nodemask(task, &newmems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 		mm = get_task_mm(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 		if (!mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 		migrate = is_memory_migrate(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 		mpol_rebind_mm(mm, &cs->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 		if (migrate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 			cpuset_migrate_mm(mm, &cs->old_mems_allowed, &newmems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 			mmput(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 	css_task_iter_end(&it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 	 * All the tasks' nodemasks have been updated, update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	 * cs->old_mems_allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 	cs->old_mems_allowed = newmems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 	/* We're done rebinding vmas to this cpuset's new mems_allowed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 	cpuset_being_rebound = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763)  * update_nodemasks_hier - Update effective nodemasks and tasks in the subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764)  * @cs: the cpuset to consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765)  * @new_mems: a temp variable for calculating new effective_mems
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767)  * When configured nodemask is changed, the effective nodemasks of this cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768)  * and all its descendants need to be updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770)  * On legacy hiearchy, effective_mems will be the same with mems_allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772)  * Called with cpuset_mutex held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 	struct cpuset *cp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 	cpuset_for_each_descendant_pre(cp, pos_css, cs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 		struct cpuset *parent = parent_cs(cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 		nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 		 * If it becomes empty, inherit the effective mask of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 		 * parent, which is guaranteed to have some MEMs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 		if (is_in_v2_mode() && nodes_empty(*new_mems))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 			*new_mems = parent->effective_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 		/* Skip the whole subtree if the nodemask remains the same. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 		if (nodes_equal(*new_mems, cp->effective_mems)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 			pos_css = css_rightmost_descendant(pos_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 		if (!css_tryget_online(&cp->css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 		spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 		cp->effective_mems = *new_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 		spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 		WARN_ON(!is_in_v2_mode() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 			!nodes_equal(cp->mems_allowed, cp->effective_mems));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 		update_tasks_nodemask(cp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 		css_put(&cp->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818)  * Handle user request to change the 'mems' memory placement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819)  * of a cpuset.  Needs to validate the request, update the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820)  * cpusets mems_allowed, and for each task in the cpuset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821)  * update mems_allowed and rebind task's mempolicy and any vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822)  * mempolicies and if the cpuset is marked 'memory_migrate',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823)  * migrate the tasks pages to the new memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825)  * Call with cpuset_mutex held. May take callback_lock during call.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826)  * Will take tasklist_lock, scan tasklist for tasks in cpuset cs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827)  * lock each such tasks mm->mmap_lock, scan its vma's and rebind
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828)  * their mempolicies to the cpusets new mems_allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 			   const char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 	int retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 	 * top_cpuset.mems_allowed tracks node_stats[N_MEMORY];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 	 * it's read-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 	if (cs == &top_cpuset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 		retval = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 	 * An empty mems_allowed is ok iff there are no tasks in the cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 	 * Since nodelist_parse() fails on an empty mask, we special case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 	 * that parsing.  The validate_change() call ensures that cpusets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 	 * with tasks have memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 	if (!*buf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 		nodes_clear(trialcs->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 		retval = nodelist_parse(buf, trialcs->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 		if (retval < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 			goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 		if (!nodes_subset(trialcs->mems_allowed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 				  top_cpuset.mems_allowed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 			retval = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 			goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	if (nodes_equal(cs->mems_allowed, trialcs->mems_allowed)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 		retval = 0;		/* Too easy - nothing to do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 	retval = validate_change(cs, trialcs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 	if (retval < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 	spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 	cs->mems_allowed = trialcs->mems_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 	spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 	/* use trialcs->mems_allowed as a temp variable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 	update_nodemasks_hier(cs, &trialcs->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 	return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) bool current_cpuset_is_being_rebound(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 	bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 	ret = task_cs(current) == cpuset_being_rebound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) static int update_relax_domain_level(struct cpuset *cs, s64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 	if (val < -1 || val >= sched_domain_level_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 	if (val != cs->relax_domain_level) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 		cs->relax_domain_level = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 		if (!cpumask_empty(cs->cpus_allowed) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 		    is_sched_load_balance(cs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 			rebuild_sched_domains_locked();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911)  * update_tasks_flags - update the spread flags of tasks in the cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912)  * @cs: the cpuset in which each task's spread flags needs to be changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914)  * Iterate through each task of @cs updating its spread flags.  As this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915)  * function is called with cpuset_mutex held, cpuset membership stays
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916)  * stable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) static void update_tasks_flags(struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 	struct css_task_iter it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 	css_task_iter_start(&cs->css, 0, &it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 	while ((task = css_task_iter_next(&it)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 		cpuset_update_task_spread_flag(cs, task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 	css_task_iter_end(&it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930)  * update_flag - read a 0 or a 1 in a file and update associated flag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931)  * bit:		the bit to update (see cpuset_flagbits_t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932)  * cs:		the cpuset to update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933)  * turning_on: 	whether the flag is being set or cleared
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935)  * Call with cpuset_mutex held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 		       int turning_on)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 	struct cpuset *trialcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 	int balance_flag_changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 	int spread_flag_changed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 	trialcs = alloc_trial_cpuset(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 	if (!trialcs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 	if (turning_on)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 		set_bit(bit, &trialcs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 		clear_bit(bit, &trialcs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 	err = validate_change(cs, trialcs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 	balance_flag_changed = (is_sched_load_balance(cs) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 				is_sched_load_balance(trialcs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 	spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 			|| (is_spread_page(cs) != is_spread_page(trialcs)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 	spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 	cs->flags = trialcs->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 	spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) 	if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 		rebuild_sched_domains_locked();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 	if (spread_flag_changed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 		update_tasks_flags(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 	free_cpuset(trialcs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980)  * update_prstate - update partititon_root_state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981)  * cs: the cpuset to update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982)  * new_prs: new partition root state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984)  * Call with cpuset_mutex held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) static int update_prstate(struct cpuset *cs, int new_prs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	int err, old_prs = cs->partition_root_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 	struct cpuset *parent = parent_cs(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	struct tmpmasks tmpmask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 	if (old_prs == new_prs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 	 * Cannot force a partial or invalid partition root to a full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 	 * partition root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 	if (new_prs && (old_prs == PRS_ERROR))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 	if (alloc_cpumasks(NULL, &tmpmask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 	err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 	if (!old_prs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 		 * Turning on partition root requires setting the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 		 * CS_CPU_EXCLUSIVE bit implicitly as well and cpus_allowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 		 * cannot be NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 		if (cpumask_empty(cs->cpus_allowed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 		err = update_flag(CS_CPU_EXCLUSIVE, cs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 		err = update_parent_subparts_cpumask(cs, partcmd_enable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 						     NULL, &tmpmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 		if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 			update_flag(CS_CPU_EXCLUSIVE, cs, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 		 * Turning off partition root will clear the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 		 * CS_CPU_EXCLUSIVE bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 		if (old_prs == PRS_ERROR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 			update_flag(CS_CPU_EXCLUSIVE, cs, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 			err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 		err = update_parent_subparts_cpumask(cs, partcmd_disable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 						     NULL, &tmpmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 		/* Turning off CS_CPU_EXCLUSIVE will not return error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 		update_flag(CS_CPU_EXCLUSIVE, cs, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 	 * Update cpumask of parent's tasks except when it is the top
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 	 * cpuset as some system daemons cannot be mapped to other CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 	if (parent != &top_cpuset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 		update_tasks_cpumask(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 	if (parent->child_ecpus_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 		update_sibling_cpumasks(parent, cs, &tmpmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 	rebuild_sched_domains_locked();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 	if (!err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 		spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 		cs->partition_root_state = new_prs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 		spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 	free_cpumasks(NULL, &tmpmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068)  * Frequency meter - How fast is some event occurring?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070)  * These routines manage a digitally filtered, constant time based,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071)  * event frequency meter.  There are four routines:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072)  *   fmeter_init() - initialize a frequency meter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073)  *   fmeter_markevent() - called each time the event happens.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074)  *   fmeter_getrate() - returns the recent rate of such events.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075)  *   fmeter_update() - internal routine used to update fmeter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077)  * A common data structure is passed to each of these routines,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078)  * which is used to keep track of the state required to manage the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079)  * frequency meter and its digital filter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081)  * The filter works on the number of events marked per unit time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082)  * The filter is single-pole low-pass recursive (IIR).  The time unit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083)  * is 1 second.  Arithmetic is done using 32-bit integers scaled to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084)  * simulate 3 decimal digits of precision (multiplied by 1000).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086)  * With an FM_COEF of 933, and a time base of 1 second, the filter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087)  * has a half-life of 10 seconds, meaning that if the events quit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088)  * happening, then the rate returned from the fmeter_getrate()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089)  * will be cut in half each 10 seconds, until it converges to zero.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091)  * It is not worth doing a real infinitely recursive filter.  If more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092)  * than FM_MAXTICKS ticks have elapsed since the last filter event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093)  * just compute FM_MAXTICKS ticks worth, by which point the level
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094)  * will be stable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096)  * Limit the count of unprocessed events to FM_MAXCNT, so as to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097)  * arithmetic overflow in the fmeter_update() routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099)  * Given the simple 32 bit integer arithmetic used, this meter works
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100)  * best for reporting rates between one per millisecond (msec) and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101)  * one per 32 (approx) seconds.  At constant rates faster than one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102)  * per msec it maxes out at values just under 1,000,000.  At constant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103)  * rates between one per msec, and one per second it will stabilize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104)  * to a value N*1000, where N is the rate of events per second.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105)  * At constant rates between one per second and one per 32 seconds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106)  * it will be choppy, moving up on the seconds that have an event,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107)  * and then decaying until the next event.  At rates slower than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108)  * about one in 32 seconds, it decays all the way back to zero between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)  * each event.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) #define FM_COEF 933		/* coefficient for half-life of 10 secs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) #define FM_MAXTICKS ((u32)99)   /* useless computing more ticks than this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) #define FM_MAXCNT 1000000	/* limit cnt to avoid overflow */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) #define FM_SCALE 1000		/* faux fixed point scale */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) /* Initialize a frequency meter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) static void fmeter_init(struct fmeter *fmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 	fmp->cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 	fmp->val = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 	fmp->time = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 	spin_lock_init(&fmp->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) /* Internal meter update - process cnt events and update value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) static void fmeter_update(struct fmeter *fmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 	time64_t now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 	u32 ticks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 	now = ktime_get_seconds();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 	ticks = now - fmp->time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 	if (ticks == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 	ticks = min(FM_MAXTICKS, ticks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 	while (ticks-- > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 		fmp->val = (FM_COEF * fmp->val) / FM_SCALE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 	fmp->time = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 	fmp->val += ((FM_SCALE - FM_COEF) * fmp->cnt) / FM_SCALE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 	fmp->cnt = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) /* Process any previous ticks, then bump cnt by one (times scale). */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) static void fmeter_markevent(struct fmeter *fmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 	spin_lock(&fmp->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 	fmeter_update(fmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 	fmp->cnt = min(FM_MAXCNT, fmp->cnt + FM_SCALE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 	spin_unlock(&fmp->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) /* Process any previous ticks, then return current value. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) static int fmeter_getrate(struct fmeter *fmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 	int val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 	spin_lock(&fmp->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 	fmeter_update(fmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) 	val = fmp->val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 	spin_unlock(&fmp->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) 	return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) static struct cpuset *cpuset_attach_old_cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) /* Called by cgroups to determine if a cpuset is usable; cpuset_mutex held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) static int cpuset_can_attach(struct cgroup_taskset *tset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 	struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 	struct cpuset *cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 	/* used later by cpuset_attach() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 	cpuset_attach_old_cs = task_cs(cgroup_taskset_first(tset, &css));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 	cs = css_cs(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) 	/* allow moving tasks into an empty cpuset if on default hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 	ret = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 	if (!is_in_v2_mode() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) 	    (cpumask_empty(cs->cpus_allowed) || nodes_empty(cs->mems_allowed)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) 	cgroup_taskset_for_each(task, css, tset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 		ret = task_can_attach(task, cs->cpus_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) 		ret = security_task_setscheduler(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 	 * Mark attach is in progress.  This makes validate_change() fail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 	 * changes which zero cpus/mems_allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) 	cs->attach_in_progress++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) static void cpuset_cancel_attach(struct cgroup_taskset *tset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 	struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 	cgroup_taskset_first(tset, &css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 	css_cs(css)->attach_in_progress--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222)  * Protected by cpuset_mutex.  cpus_attach is used only by cpuset_attach()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223)  * but we can't allocate it dynamically there.  Define it global and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224)  * allocate from cpuset_init().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) static cpumask_var_t cpus_attach;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) static void cpuset_attach(struct cgroup_taskset *tset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) 	/* static buf protected by cpuset_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) 	static nodemask_t cpuset_attach_nodemask_to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 	struct task_struct *leader;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 	struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 	struct cpuset *cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 	struct cpuset *oldcs = cpuset_attach_old_cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 	cgroup_taskset_first(tset, &css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 	cs = css_cs(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 	cpus_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 	guarantee_online_mems(cs, &cpuset_attach_nodemask_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 	cgroup_taskset_for_each(task, css, tset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 		if (cs != &top_cpuset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 			guarantee_online_cpus(task, cpus_attach);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 			cpumask_copy(cpus_attach, task_cpu_possible_mask(task));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 		 * can_attach beforehand should guarantee that this doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 		 * fail.  TODO: have a better way to handle failure here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 		WARN_ON_ONCE(update_cpus_allowed(cs, task, cpus_attach));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 		cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 		cpuset_update_task_spread_flag(cs, task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 	 * Change mm for all threadgroup leaders. This is expensive and may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 	 * sleep and should be moved outside migration path proper.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 	cpuset_attach_nodemask_to = cs->effective_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 	cgroup_taskset_for_each_leader(leader, css, tset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 		struct mm_struct *mm = get_task_mm(leader);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 		if (mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 			mpol_rebind_mm(mm, &cpuset_attach_nodemask_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 			 * old_mems_allowed is the same with mems_allowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 			 * here, except if this task is being moved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 			 * automatically due to hotplug.  In that case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 			 * @mems_allowed has been updated and is empty, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 			 * @old_mems_allowed is the right nodesets that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 			 * migrate mm from.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 			if (is_memory_migrate(cs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 				cpuset_migrate_mm(mm, &oldcs->old_mems_allowed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 						  &cpuset_attach_nodemask_to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 				mmput(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 	cs->old_mems_allowed = cpuset_attach_nodemask_to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 	cs->attach_in_progress--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 	if (!cs->attach_in_progress)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 		wake_up(&cpuset_attach_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 	cpus_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) /* The various types of files and directories in a cpuset file system */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) typedef enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 	FILE_MEMORY_MIGRATE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 	FILE_CPULIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 	FILE_MEMLIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 	FILE_EFFECTIVE_CPULIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	FILE_EFFECTIVE_MEMLIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 	FILE_SUBPARTS_CPULIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 	FILE_CPU_EXCLUSIVE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 	FILE_MEM_EXCLUSIVE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 	FILE_MEM_HARDWALL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 	FILE_SCHED_LOAD_BALANCE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 	FILE_PARTITION_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 	FILE_SCHED_RELAX_DOMAIN_LEVEL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 	FILE_MEMORY_PRESSURE_ENABLED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 	FILE_MEMORY_PRESSURE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 	FILE_SPREAD_PAGE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 	FILE_SPREAD_SLAB,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) } cpuset_filetype_t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 			    u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 	struct cpuset *cs = css_cs(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 	cpuset_filetype_t type = cft->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 	int retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 	get_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 	if (!is_cpuset_online(cs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 		retval = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 	case FILE_CPU_EXCLUSIVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 		retval = update_flag(CS_CPU_EXCLUSIVE, cs, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 	case FILE_MEM_EXCLUSIVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 		retval = update_flag(CS_MEM_EXCLUSIVE, cs, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 	case FILE_MEM_HARDWALL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 		retval = update_flag(CS_MEM_HARDWALL, cs, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) 	case FILE_SCHED_LOAD_BALANCE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) 		retval = update_flag(CS_SCHED_LOAD_BALANCE, cs, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) 	case FILE_MEMORY_MIGRATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 		retval = update_flag(CS_MEMORY_MIGRATE, cs, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 	case FILE_MEMORY_PRESSURE_ENABLED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 		cpuset_memory_pressure_enabled = !!val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) 	case FILE_SPREAD_PAGE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 		retval = update_flag(CS_SPREAD_PAGE, cs, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 	case FILE_SPREAD_SLAB:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) 		retval = update_flag(CS_SPREAD_SLAB, cs, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 		retval = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 	put_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 	return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) static int cpuset_write_s64(struct cgroup_subsys_state *css, struct cftype *cft,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 			    s64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 	struct cpuset *cs = css_cs(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 	cpuset_filetype_t type = cft->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) 	int retval = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 	get_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 	if (!is_cpuset_online(cs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 		retval = update_relax_domain_level(cs, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 		retval = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 	put_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 	return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395)  * Common handling for a write to a "cpus" or "mems" file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) static ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 				    char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 	struct cpuset *cs = css_cs(of_css(of));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 	struct cpuset *trialcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 	int retval = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 	buf = strstrip(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 	 * CPU or memory hotunplug may leave @cs w/o any execution
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 	 * resources, in which case the hotplug code asynchronously updates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 	 * configuration and transfers all tasks to the nearest ancestor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 	 * which can execute.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 	 * As writes to "cpus" or "mems" may restore @cs's execution
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 	 * resources, wait for the previously scheduled operations before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) 	 * proceeding, so that we don't end up keep removing tasks added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 	 * after execution capability is restored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 	 * cpuset_hotplug_work calls back into cgroup core via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 	 * cgroup_transfer_tasks() and waiting for it from a cgroupfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 	 * operation like this one can lead to a deadlock through kernfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 	 * active_ref protection.  Let's break the protection.  Losing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 	 * protection is okay as we check whether @cs is online after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 	 * grabbing cpuset_mutex anyway.  This only happens on the legacy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 	 * hierarchies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 	css_get(&cs->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 	kernfs_break_active_protection(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 	flush_work(&cpuset_hotplug_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 	get_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 	if (!is_cpuset_online(cs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 	trialcs = alloc_trial_cpuset(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 	if (!trialcs) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 		retval = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 	switch (of_cft(of)->private) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 	case FILE_CPULIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 		retval = update_cpumask(cs, trialcs, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 	case FILE_MEMLIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 		retval = update_nodemask(cs, trialcs, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 		retval = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) 	free_cpuset(trialcs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 	put_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 	kernfs_unbreak_active_protection(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 	css_put(&cs->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 	flush_workqueue(cpuset_migrate_mm_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) 	return retval ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463)  * These ascii lists should be read in a single call, by using a user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464)  * buffer large enough to hold the entire map.  If read in smaller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465)  * chunks, there is no guarantee of atomicity.  Since the display format
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466)  * used, list of ranges of sequential numbers, is variable length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467)  * and since these maps can change value dynamically, one could read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468)  * gibberish by doing partial reads while a list was changing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) static int cpuset_common_seq_show(struct seq_file *sf, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) 	struct cpuset *cs = css_cs(seq_css(sf));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) 	cpuset_filetype_t type = seq_cft(sf)->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 	spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 	case FILE_CPULIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 		seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->cpus_requested));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) 	case FILE_MEMLIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 		seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->mems_allowed));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) 	case FILE_EFFECTIVE_CPULIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) 		seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->effective_cpus));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) 	case FILE_EFFECTIVE_MEMLIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) 		seq_printf(sf, "%*pbl\n", nodemask_pr_args(&cs->effective_mems));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 	case FILE_SUBPARTS_CPULIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) 		seq_printf(sf, "%*pbl\n", cpumask_pr_args(cs->subparts_cpus));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 		ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) 	spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) static u64 cpuset_read_u64(struct cgroup_subsys_state *css, struct cftype *cft)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) 	struct cpuset *cs = css_cs(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 	cpuset_filetype_t type = cft->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) 	case FILE_CPU_EXCLUSIVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) 		return is_cpu_exclusive(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) 	case FILE_MEM_EXCLUSIVE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) 		return is_mem_exclusive(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) 	case FILE_MEM_HARDWALL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) 		return is_mem_hardwall(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) 	case FILE_SCHED_LOAD_BALANCE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) 		return is_sched_load_balance(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) 	case FILE_MEMORY_MIGRATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) 		return is_memory_migrate(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) 	case FILE_MEMORY_PRESSURE_ENABLED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 		return cpuset_memory_pressure_enabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 	case FILE_MEMORY_PRESSURE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 		return fmeter_getrate(&cs->fmeter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 	case FILE_SPREAD_PAGE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 		return is_spread_page(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 	case FILE_SPREAD_SLAB:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 		return is_spread_slab(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 	/* Unreachable but makes gcc happy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) static s64 cpuset_read_s64(struct cgroup_subsys_state *css, struct cftype *cft)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 	struct cpuset *cs = css_cs(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) 	cpuset_filetype_t type = cft->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) 	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 		return cs->relax_domain_level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 	/* Unrechable but makes gcc happy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) static int sched_partition_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) 	struct cpuset *cs = css_cs(seq_css(seq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) 	switch (cs->partition_root_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) 	case PRS_ENABLED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) 		seq_puts(seq, "root\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) 	case PRS_DISABLED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) 		seq_puts(seq, "member\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) 	case PRS_ERROR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) 		seq_puts(seq, "root invalid\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) static ssize_t sched_partition_write(struct kernfs_open_file *of, char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) 				     size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 	struct cpuset *cs = css_cs(of_css(of));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) 	int val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) 	int retval = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) 	buf = strstrip(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) 	 * Convert "root" to ENABLED, and convert "member" to DISABLED.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) 	if (!strcmp(buf, "root"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) 		val = PRS_ENABLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) 	else if (!strcmp(buf, "member"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) 		val = PRS_DISABLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) 	css_get(&cs->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) 	get_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) 	if (!is_cpuset_online(cs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) 	retval = update_prstate(cs, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) 	put_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) 	css_put(&cs->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) 	return retval ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600)  * for the common functions, 'private' gives the type of file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) static struct cftype legacy_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 		.name = "cpus",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) 		.seq_show = cpuset_common_seq_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 		.write = cpuset_write_resmask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) 		.max_write_len = (100U + 6 * NR_CPUS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) 		.private = FILE_CPULIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) 		.name = "mems",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) 		.seq_show = cpuset_common_seq_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) 		.write = cpuset_write_resmask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) 		.max_write_len = (100U + 6 * MAX_NUMNODES),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) 		.private = FILE_MEMLIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) 		.name = "effective_cpus",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) 		.seq_show = cpuset_common_seq_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) 		.private = FILE_EFFECTIVE_CPULIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) 		.name = "effective_mems",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) 		.seq_show = cpuset_common_seq_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) 		.private = FILE_EFFECTIVE_MEMLIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) 		.name = "cpu_exclusive",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) 		.read_u64 = cpuset_read_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 		.write_u64 = cpuset_write_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) 		.private = FILE_CPU_EXCLUSIVE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) 		.name = "mem_exclusive",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) 		.read_u64 = cpuset_read_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) 		.write_u64 = cpuset_write_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 		.private = FILE_MEM_EXCLUSIVE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 		.name = "mem_hardwall",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 		.read_u64 = cpuset_read_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) 		.write_u64 = cpuset_write_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) 		.private = FILE_MEM_HARDWALL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) 		.name = "sched_load_balance",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) 		.read_u64 = cpuset_read_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) 		.write_u64 = cpuset_write_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) 		.private = FILE_SCHED_LOAD_BALANCE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) 		.name = "sched_relax_domain_level",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) 		.read_s64 = cpuset_read_s64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) 		.write_s64 = cpuset_write_s64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) 		.private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) 		.name = "memory_migrate",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) 		.read_u64 = cpuset_read_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) 		.write_u64 = cpuset_write_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) 		.private = FILE_MEMORY_MIGRATE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 		.name = "memory_pressure",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) 		.read_u64 = cpuset_read_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 		.private = FILE_MEMORY_PRESSURE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 		.name = "memory_spread_page",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 		.read_u64 = cpuset_read_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) 		.write_u64 = cpuset_write_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) 		.private = FILE_SPREAD_PAGE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) 		.name = "memory_spread_slab",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) 		.read_u64 = cpuset_read_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) 		.write_u64 = cpuset_write_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) 		.private = FILE_SPREAD_SLAB,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) 		.name = "memory_pressure_enabled",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) 		.flags = CFTYPE_ONLY_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) 		.read_u64 = cpuset_read_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) 		.write_u64 = cpuset_write_u64,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) 		.private = FILE_MEMORY_PRESSURE_ENABLED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) 	{ }	/* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706)  * This is currently a minimal set for the default hierarchy. It can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707)  * expanded later on by migrating more features and control files from v1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) static struct cftype dfl_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) 		.name = "cpus",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 		.seq_show = cpuset_common_seq_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) 		.write = cpuset_write_resmask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) 		.max_write_len = (100U + 6 * NR_CPUS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 		.private = FILE_CPULIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) 		.flags = CFTYPE_NOT_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) 		.name = "mems",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) 		.seq_show = cpuset_common_seq_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) 		.write = cpuset_write_resmask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) 		.max_write_len = (100U + 6 * MAX_NUMNODES),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) 		.private = FILE_MEMLIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) 		.flags = CFTYPE_NOT_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) 		.name = "cpus.effective",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) 		.seq_show = cpuset_common_seq_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) 		.private = FILE_EFFECTIVE_CPULIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) 		.name = "mems.effective",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) 		.seq_show = cpuset_common_seq_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) 		.private = FILE_EFFECTIVE_MEMLIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) 		.name = "cpus.partition",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) 		.seq_show = sched_partition_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) 		.write = sched_partition_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) 		.private = FILE_PARTITION_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) 		.flags = CFTYPE_NOT_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) 		.name = "cpus.subpartitions",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) 		.seq_show = cpuset_common_seq_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) 		.private = FILE_SUBPARTS_CPULIST,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) 		.flags = CFTYPE_DEBUG,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) 	{ }	/* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760)  *	cpuset_css_alloc - allocate a cpuset css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761)  *	cgrp:	control group that the new cpuset will be part of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) static struct cgroup_subsys_state *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) cpuset_css_alloc(struct cgroup_subsys_state *parent_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) 	struct cpuset *cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) 	if (!parent_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) 		return &top_cpuset.css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) 	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) 	if (!cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) 	if (alloc_cpumasks(cs, NULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) 		kfree(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) 	set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) 	nodes_clear(cs->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) 	nodes_clear(cs->effective_mems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) 	fmeter_init(&cs->fmeter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) 	cs->relax_domain_level = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) 	return &cs->css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) static int cpuset_css_online(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 	struct cpuset *cs = css_cs(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) 	struct cpuset *parent = parent_cs(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) 	struct cpuset *tmp_cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 	struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) 	if (!parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 	get_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) 	set_bit(CS_ONLINE, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) 	if (is_spread_page(parent))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) 		set_bit(CS_SPREAD_PAGE, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) 	if (is_spread_slab(parent))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) 		set_bit(CS_SPREAD_SLAB, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) 	cpuset_inc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) 	spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) 	if (is_in_v2_mode()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) 		cpumask_copy(cs->effective_cpus, parent->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) 		cs->effective_mems = parent->effective_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) 		cs->use_parent_ecpus = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) 		parent->child_ecpus_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) 	spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) 	if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) 	 * Clone @parent's configuration if CGRP_CPUSET_CLONE_CHILDREN is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) 	 * set.  This flag handling is implemented in cgroup core for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) 	 * histrical reasons - the flag may be specified during mount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) 	 * Currently, if any sibling cpusets have exclusive cpus or mem, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) 	 * refuse to clone the configuration - thereby refusing the task to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) 	 * be entered, and as a result refusing the sys_unshare() or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) 	 * clone() which initiated it.  If this becomes a problem for some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) 	 * users who wish to allow that scenario, then this could be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) 	 * changed to grant parent->cpus_allowed-sibling_cpus_exclusive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) 	 * (and likewise for mems) to the new cgroup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) 	cpuset_for_each_child(tmp_cs, pos_css, parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) 		if (is_mem_exclusive(tmp_cs) || is_cpu_exclusive(tmp_cs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) 			rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) 			goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 	spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 	cs->mems_allowed = parent->mems_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 	cs->effective_mems = parent->mems_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) 	cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 	cpumask_copy(cs->cpus_requested, parent->cpus_requested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) 	cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) 	spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) 	put_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859)  * If the cpuset being removed has its flag 'sched_load_balance'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860)  * enabled, then simulate turning sched_load_balance off, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861)  * will call rebuild_sched_domains_locked(). That is not needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862)  * in the default hierarchy where only changes in partition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863)  * will cause repartitioning.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865)  * If the cpuset has the 'sched.partition' flag enabled, simulate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866)  * turning 'sched.partition" off.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) static void cpuset_css_offline(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) 	struct cpuset *cs = css_cs(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) 	get_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) 	if (is_partition_root(cs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) 		update_prstate(cs, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) 	if (!cgroup_subsys_on_dfl(cpuset_cgrp_subsys) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) 	    is_sched_load_balance(cs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) 		update_flag(CS_SCHED_LOAD_BALANCE, cs, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) 	if (cs->use_parent_ecpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) 		struct cpuset *parent = parent_cs(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) 		cs->use_parent_ecpus = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) 		parent->child_ecpus_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) 	cpuset_dec();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) 	clear_bit(CS_ONLINE, &cs->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) 	put_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) static void cpuset_css_free(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) 	struct cpuset *cs = css_cs(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) 	free_cpuset(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) static void cpuset_bind(struct cgroup_subsys_state *root_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) 	spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) 	if (is_in_v2_mode()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) 		cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) 		top_cpuset.mems_allowed = node_possible_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) 		cpumask_copy(top_cpuset.cpus_allowed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) 			     top_cpuset.effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) 		top_cpuset.mems_allowed = top_cpuset.effective_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) 	spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923)  * Make sure the new task conform to the current state of its parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924)  * which could have been changed by cpuset just after it inherits the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925)  * state from the parent and before it sits on the cgroup's task list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) static void cpuset_fork(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) 	int inherit_cpus = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) 	if (task_css_is_root(task, cpuset_cgrp_id))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) 	trace_android_rvh_cpuset_fork(task, &inherit_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) 	if (!inherit_cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) 		set_cpus_allowed_ptr(task, current->cpus_ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) 	task->mems_allowed = current->mems_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) struct cgroup_subsys cpuset_cgrp_subsys = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) 	.css_alloc	= cpuset_css_alloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) 	.css_online	= cpuset_css_online,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) 	.css_offline	= cpuset_css_offline,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) 	.css_free	= cpuset_css_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) 	.can_attach	= cpuset_can_attach,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) 	.cancel_attach	= cpuset_cancel_attach,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) 	.attach		= cpuset_attach,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) 	.post_attach	= cpuset_post_attach,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) 	.bind		= cpuset_bind,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) 	.fork		= cpuset_fork,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) 	.legacy_cftypes	= legacy_files,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) 	.dfl_cftypes	= dfl_files,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) 	.early_init	= true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) 	.threaded	= true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957)  * cpuset_init - initialize cpusets at system boot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959)  * Description: Initialize top_cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960)  **/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) int __init cpuset_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) 	BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) 	BUG_ON(!alloc_cpumask_var(&top_cpuset.effective_cpus, GFP_KERNEL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) 	BUG_ON(!zalloc_cpumask_var(&top_cpuset.subparts_cpus, GFP_KERNEL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) 	BUG_ON(!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) 	cpumask_setall(top_cpuset.cpus_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) 	cpumask_setall(top_cpuset.cpus_requested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) 	nodes_setall(top_cpuset.mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) 	cpumask_setall(top_cpuset.effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) 	nodes_setall(top_cpuset.effective_mems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) 	fmeter_init(&top_cpuset.fmeter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) 	set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) 	top_cpuset.relax_domain_level = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) 	BUG_ON(!alloc_cpumask_var(&cpus_attach, GFP_KERNEL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985)  * If CPU and/or memory hotplug handlers, below, unplug any CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986)  * or memory nodes, we need to walk over the cpuset hierarchy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987)  * removing that CPU or node from all cpusets.  If this removes the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988)  * last CPU or node from a cpuset, then move the tasks in the empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989)  * cpuset to its next-highest non-empty parent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) 	struct cpuset *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) 	 * Find its next-highest non-empty parent, (top cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) 	 * has online cpus, so can't be empty).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) 	parent = parent_cs(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) 	while (cpumask_empty(parent->cpus_allowed) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) 			nodes_empty(parent->mems_allowed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) 		parent = parent_cs(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) 	if (cgroup_transfer_tasks(parent->css.cgroup, cs->css.cgroup)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) 		pr_err("cpuset: failed to transfer tasks out of empty cpuset ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) 		pr_cont_cgroup_name(cs->css.cgroup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) 		pr_cont("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) hotplug_update_tasks_legacy(struct cpuset *cs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) 			    struct cpumask *new_cpus, nodemask_t *new_mems,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) 			    bool cpus_updated, bool mems_updated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) 	bool is_empty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) 	spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) 	cpumask_copy(cs->cpus_allowed, new_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) 	cpumask_copy(cs->effective_cpus, new_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) 	cs->mems_allowed = *new_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) 	cs->effective_mems = *new_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) 	spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) 	 * Don't call update_tasks_cpumask() if the cpuset becomes empty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) 	 * as the tasks will be migratecd to an ancestor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) 	if (cpus_updated && !cpumask_empty(cs->cpus_allowed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) 		update_tasks_cpumask(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) 	if (mems_updated && !nodes_empty(cs->mems_allowed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) 		update_tasks_nodemask(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) 	is_empty = cpumask_empty(cs->cpus_allowed) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) 		   nodes_empty(cs->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) 	 * Move tasks to the nearest ancestor with execution resources,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) 	 * This is full cgroup operation which will also call back into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) 	 * cpuset. Should be done outside any lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) 	if (is_empty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) 		remove_tasks_in_empty_cpuset(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) hotplug_update_tasks(struct cpuset *cs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) 		     struct cpumask *new_cpus, nodemask_t *new_mems,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) 		     bool cpus_updated, bool mems_updated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) 	if (cpumask_empty(new_cpus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) 		cpumask_copy(new_cpus, parent_cs(cs)->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) 	if (nodes_empty(*new_mems))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) 		*new_mems = parent_cs(cs)->effective_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) 	spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) 	cpumask_copy(cs->effective_cpus, new_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) 	cs->effective_mems = *new_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) 	spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) 	if (cpus_updated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) 		update_tasks_cpumask(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) 	if (mems_updated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) 		update_tasks_nodemask(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) static bool force_rebuild;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) void cpuset_force_rebuild(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) 	force_rebuild = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079)  * cpuset_hotplug_update_tasks - update tasks in a cpuset for hotunplug
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080)  * @cs: cpuset in interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081)  * @tmp: the tmpmasks structure pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083)  * Compare @cs's cpu and mem masks against top_cpuset and if some have gone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084)  * offline, update @cs accordingly.  If @cs ends up with no CPU or memory,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085)  * all its tasks are moved to the nearest ancestor with both resources.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) 	static cpumask_t new_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) 	static nodemask_t new_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) 	bool cpus_updated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) 	bool mems_updated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) 	struct cpuset *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) 	wait_event(cpuset_attach_wq, cs->attach_in_progress == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) 	 * We have raced with task attaching. We wait until attaching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) 	 * is finished, so we won't attach a task to an empty cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) 	if (cs->attach_in_progress) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) 		mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) 	parent = parent_cs(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) 	compute_effective_cpumask(&new_cpus, cs, parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) 	nodes_and(new_mems, cs->mems_allowed, parent->effective_mems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) 	if (cs->nr_subparts_cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) 		 * Make sure that CPUs allocated to child partitions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) 		 * do not show up in effective_cpus.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) 		cpumask_andnot(&new_cpus, &new_cpus, cs->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) 	if (!tmp || !cs->partition_root_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) 		goto update_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) 	 * In the unlikely event that a partition root has empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) 	 * effective_cpus or its parent becomes erroneous, we have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) 	 * transition it to the erroneous state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) 	if (is_partition_root(cs) && (cpumask_empty(&new_cpus) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) 	   (parent->partition_root_state == PRS_ERROR))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) 		if (cs->nr_subparts_cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) 			spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) 			cs->nr_subparts_cpus = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) 			cpumask_clear(cs->subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) 			spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) 			compute_effective_cpumask(&new_cpus, cs, parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) 		 * If the effective_cpus is empty because the child
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) 		 * partitions take away all the CPUs, we can keep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) 		 * the current partition and let the child partitions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) 		 * fight for available CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) 		if ((parent->partition_root_state == PRS_ERROR) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) 		     cpumask_empty(&new_cpus)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) 			update_parent_subparts_cpumask(cs, partcmd_disable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) 						       NULL, tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) 			spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) 			cs->partition_root_state = PRS_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) 			spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) 		cpuset_force_rebuild();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) 	 * On the other hand, an erroneous partition root may be transitioned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) 	 * back to a regular one or a partition root with no CPU allocated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) 	 * from the parent may change to erroneous.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) 	if (is_partition_root(parent) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) 	   ((cs->partition_root_state == PRS_ERROR) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) 	    !cpumask_intersects(&new_cpus, parent->subparts_cpus)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) 	     update_parent_subparts_cpumask(cs, partcmd_update, NULL, tmp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) 		cpuset_force_rebuild();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) update_tasks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) 	cpus_updated = !cpumask_equal(&new_cpus, cs->effective_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) 	mems_updated = !nodes_equal(new_mems, cs->effective_mems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) 	if (is_in_v2_mode())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) 		hotplug_update_tasks(cs, &new_cpus, &new_mems,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) 				     cpus_updated, mems_updated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) 		hotplug_update_tasks_legacy(cs, &new_cpus, &new_mems,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) 					    cpus_updated, mems_updated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180)  * cpuset_hotplug_workfn - handle CPU/memory hotunplug for a cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182)  * This function is called after either CPU or memory configuration has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183)  * changed and updates cpuset accordingly.  The top_cpuset is always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184)  * synchronized to cpu_active_mask and N_MEMORY, which is necessary in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185)  * order to make cpusets transparent (of no affect) on systems that are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186)  * actively using CPU hotplug but making no active use of cpusets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188)  * Non-root cpusets are only affected by offlining.  If any CPUs or memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189)  * nodes have been taken down, cpuset_hotplug_update_tasks() is invoked on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190)  * all descendants.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192)  * Note that CPU offlining during suspend is ignored.  We don't modify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193)  * cpusets across suspend/resume cycles at all.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) void cpuset_hotplug_workfn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) 	static cpumask_t new_cpus;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) 	static nodemask_t new_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) 	bool cpus_updated, mems_updated;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) 	bool on_dfl = is_in_v2_mode();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) 	struct tmpmasks tmp, *ptmp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) 	if (on_dfl && !alloc_cpumasks(NULL, &tmp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) 		ptmp = &tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) 	mutex_lock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) 	/* fetch the available cpus/mems and find out which changed how */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) 	cpumask_copy(&new_cpus, cpu_active_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) 	new_mems = node_states[N_MEMORY];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) 	 * If subparts_cpus is populated, it is likely that the check below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) 	 * will produce a false positive on cpus_updated when the cpu list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) 	 * isn't changed. It is extra work, but it is better to be safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) 	cpus_updated = !cpumask_equal(top_cpuset.effective_cpus, &new_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) 	mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) 	 * In the rare case that hotplug removes all the cpus in subparts_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) 	 * we assumed that cpus are updated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) 	if (!cpus_updated && top_cpuset.nr_subparts_cpus)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) 		cpus_updated = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) 	/* synchronize cpus_allowed to cpu_active_mask */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) 	if (cpus_updated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) 		spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) 		if (!on_dfl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) 			cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) 		 * Make sure that CPUs allocated to child partitions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) 		 * do not show up in effective_cpus. If no CPU is left,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) 		 * we clear the subparts_cpus & let the child partitions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) 		 * fight for the CPUs again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) 		if (top_cpuset.nr_subparts_cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) 			if (cpumask_subset(&new_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) 					   top_cpuset.subparts_cpus)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) 				top_cpuset.nr_subparts_cpus = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) 				cpumask_clear(top_cpuset.subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) 				cpumask_andnot(&new_cpus, &new_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) 					       top_cpuset.subparts_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) 		cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) 		spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) 		/* we don't mess with cpumasks of tasks in top_cpuset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) 	/* synchronize mems_allowed to N_MEMORY */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) 	if (mems_updated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) 		spin_lock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) 		if (!on_dfl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) 			top_cpuset.mems_allowed = new_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) 		top_cpuset.effective_mems = new_mems;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) 		spin_unlock_irq(&callback_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) 		update_tasks_nodemask(&top_cpuset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) 	mutex_unlock(&cpuset_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) 	/* if cpus or mems changed, we need to propagate to descendants */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) 	if (cpus_updated || mems_updated) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) 		struct cpuset *cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) 		struct cgroup_subsys_state *pos_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) 		cpuset_for_each_descendant_pre(cs, pos_css, &top_cpuset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) 			if (cs == &top_cpuset || !css_tryget_online(&cs->css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) 			rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) 			cpuset_hotplug_update_tasks(cs, ptmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) 			rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) 			css_put(&cs->css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) 	/* rebuild sched domains if cpus_allowed has changed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) 	if (cpus_updated || force_rebuild) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) 		force_rebuild = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) 		rebuild_sched_domains();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) 	free_cpumasks(NULL, ptmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) void cpuset_update_active_cpus(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) 	 * We're inside cpu hotplug critical region which usually nests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) 	 * inside cgroup synchronization.  Bounce actual hotplug processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) 	 * to a work item to avoid reverse locking order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) 	schedule_work(&cpuset_hotplug_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) void cpuset_update_active_cpus_affine(int cpu)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) 	schedule_work_on(cpu, &cpuset_hotplug_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) void cpuset_wait_for_hotplug(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) 	flush_work(&cpuset_hotplug_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314)  * Keep top_cpuset.mems_allowed tracking node_states[N_MEMORY].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315)  * Call this routine anytime after node_states[N_MEMORY] changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316)  * See cpuset_update_active_cpus() for CPU hotplug handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) static int cpuset_track_online_nodes(struct notifier_block *self,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) 				unsigned long action, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) 	schedule_work(&cpuset_hotplug_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) 	return NOTIFY_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) static struct notifier_block cpuset_track_online_nodes_nb = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) 	.notifier_call = cpuset_track_online_nodes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) 	.priority = 10,		/* ??! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331)  * cpuset_init_smp - initialize cpus_allowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333)  * Description: Finish top cpuset after cpu, node maps are initialized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) void __init cpuset_init_smp(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) 	cpumask_copy(top_cpuset.cpus_allowed, cpu_active_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) 	top_cpuset.mems_allowed = node_states[N_MEMORY];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) 	top_cpuset.old_mems_allowed = top_cpuset.mems_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) 	cpumask_copy(top_cpuset.effective_cpus, cpu_active_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) 	top_cpuset.effective_mems = node_states[N_MEMORY];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) 	register_hotmemory_notifier(&cpuset_track_online_nodes_nb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) 	cpuset_migrate_mm_wq = alloc_ordered_workqueue("cpuset_migrate_mm", 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) 	BUG_ON(!cpuset_migrate_mm_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351)  * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352)  * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353)  * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355)  * Description: Returns the cpumask_var_t cpus_allowed of the cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356)  * attached to the specified @tsk.  Guaranteed to return some non-empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357)  * subset of cpu_online_mask, even if this means going outside the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358)  * tasks cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359)  **/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) 	spin_lock_irqsave(&callback_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) 	guarantee_online_cpus(tsk, pmask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) 	spin_unlock_irqrestore(&callback_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) EXPORT_SYMBOL_GPL(cpuset_cpus_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373)  * cpuset_cpus_allowed_fallback - final fallback before complete catastrophe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374)  * @tsk: pointer to task_struct with which the scheduler is struggling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376)  * Description: In the case that the scheduler cannot find an allowed cpu in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377)  * tsk->cpus_allowed, we fall back to task_cs(tsk)->cpus_allowed. In legacy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378)  * mode however, this value is the same as task_cs(tsk)->effective_cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379)  * which will not contain a sane cpumask during cases such as cpu hotplugging.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380)  * This is the absolute last resort for the scheduler and it is only used if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381)  * _every_ other avenue has been traveled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382)  **/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) 	const struct cpumask *possible_mask = task_cpu_possible_mask(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) 	const struct cpumask *cs_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) 	cs_mask = task_cs(tsk)->cpus_allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) 	if (!is_in_v2_mode() || !cpumask_subset(cs_mask, possible_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) 		goto unlock; /* select_fallback_rq will try harder */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) 	do_set_cpus_allowed(tsk, cs_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) 	 * We own tsk->cpus_allowed, nobody can change it under us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) 	 * But we used cs && cs->cpus_allowed lockless and thus can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) 	 * race with cgroup_attach_task() or update_cpumask() and get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) 	 * the wrong tsk->cpus_allowed. However, both cases imply the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) 	 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) 	 * which takes task_rq_lock().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) 	 * If we are called after it dropped the lock we must see all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) 	 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) 	 * set any mask even if it is not right from task_cs() pov,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) 	 * the pending set_cpus_allowed_ptr() will fix things.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) 	 * select_fallback_rq() will fix things ups and set cpu_possible_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) 	 * if required.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) void __init cpuset_init_current_mems_allowed(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) 	nodes_setall(current->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424)  * cpuset_mems_allowed - return mems_allowed mask from a tasks cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425)  * @tsk: pointer to task_struct from which to obtain cpuset->mems_allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427)  * Description: Returns the nodemask_t mems_allowed of the cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428)  * attached to the specified @tsk.  Guaranteed to return some non-empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429)  * subset of node_states[N_MEMORY], even if this means going outside the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430)  * tasks cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431)  **/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) 	nodemask_t mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) 	spin_lock_irqsave(&callback_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) 	guarantee_online_mems(task_cs(tsk), &mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) 	spin_unlock_irqrestore(&callback_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) 	return mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448)  * cpuset_nodemask_valid_mems_allowed - check nodemask vs. curremt mems_allowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449)  * @nodemask: the nodemask to be checked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451)  * Are any of the nodes in the nodemask allowed in current->mems_allowed?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) 	return nodes_intersects(*nodemask, current->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459)  * nearest_hardwall_ancestor() - Returns the nearest mem_exclusive or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460)  * mem_hardwall ancestor to the specified cpuset.  Call holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461)  * callback_lock.  If no ancestor is mem_exclusive or mem_hardwall
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462)  * (an unusual configuration), then returns the root cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) 	while (!(is_mem_exclusive(cs) || is_mem_hardwall(cs)) && parent_cs(cs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) 		cs = parent_cs(cs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) 	return cs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472)  * cpuset_node_allowed - Can we allocate on a memory node?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473)  * @node: is this an allowed node?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474)  * @gfp_mask: memory allocation flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476)  * If we're in interrupt, yes, we can always allocate.  If @node is set in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477)  * current's mems_allowed, yes.  If it's not a __GFP_HARDWALL request and this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478)  * node is set in the nearest hardwalled cpuset ancestor to current's cpuset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479)  * yes.  If current has access to memory reserves as an oom victim, yes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480)  * Otherwise, no.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482)  * GFP_USER allocations are marked with the __GFP_HARDWALL bit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483)  * and do not allow allocations outside the current tasks cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484)  * unless the task has been OOM killed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485)  * GFP_KERNEL allocations are not so marked, so can escape to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486)  * nearest enclosing hardwalled ancestor cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488)  * Scanning up parent cpusets requires callback_lock.  The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489)  * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490)  * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491)  * current tasks mems_allowed came up empty on the first pass over
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492)  * the zonelist.  So only GFP_KERNEL allocations, if all nodes in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493)  * cpuset are short of memory, might require taking the callback_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495)  * The first call here from mm/page_alloc:get_page_from_freelist()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496)  * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497)  * so no allocation on a node outside the cpuset is allowed (unless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498)  * in interrupt, of course).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500)  * The second pass through get_page_from_freelist() doesn't even call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501)  * here for GFP_ATOMIC calls.  For those calls, the __alloc_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502)  * variable 'wait' is not set, and the bit ALLOC_CPUSET is not set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503)  * in alloc_flags.  That logic and the checks below have the combined
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504)  * affect that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505)  *	in_interrupt - any node ok (current task context irrelevant)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506)  *	GFP_ATOMIC   - any node ok
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507)  *	tsk_is_oom_victim   - any node ok
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508)  *	GFP_KERNEL   - any node in enclosing hardwalled cpuset ok
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509)  *	GFP_USER     - only nodes in current tasks mems allowed ok.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) 	struct cpuset *cs;		/* current cpuset ancestors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) 	int allowed;			/* is allocation in zone z allowed? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) 	if (in_interrupt())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) 	if (node_isset(node, current->mems_allowed))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) 	 * Allow tasks that have access to memory reserves because they have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) 	 * been OOM killed to get memory anywhere.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) 	if (unlikely(tsk_is_oom_victim(current)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) 	if (gfp_mask & __GFP_HARDWALL)	/* If hardwall request, stop here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) 	if (current->flags & PF_EXITING) /* Let dying task have memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) 	/* Not hardwall and node outside mems_allowed: scan up cpusets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) 	spin_lock_irqsave(&callback_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) 	cs = nearest_hardwall_ancestor(task_cs(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) 	allowed = node_isset(node, cs->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) 	spin_unlock_irqrestore(&callback_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) 	return allowed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546)  * cpuset_mem_spread_node() - On which node to begin search for a file page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547)  * cpuset_slab_spread_node() - On which node to begin search for a slab page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549)  * If a task is marked PF_SPREAD_PAGE or PF_SPREAD_SLAB (as for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550)  * tasks in a cpuset with is_spread_page or is_spread_slab set),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551)  * and if the memory allocation used cpuset_mem_spread_node()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552)  * to determine on which node to start looking, as it will for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553)  * certain page cache or slab cache pages such as used for file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554)  * system buffers and inode caches, then instead of starting on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555)  * local node to look for a free page, rather spread the starting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556)  * node around the tasks mems_allowed nodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558)  * We don't have to worry about the returned node being offline
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559)  * because "it can't happen", and even if it did, it would be ok.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561)  * The routines calling guarantee_online_mems() are careful to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562)  * only set nodes in task->mems_allowed that are online.  So it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563)  * should not be possible for the following code to return an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564)  * offline node.  But if it did, that would be ok, as this routine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565)  * is not returning the node where the allocation must be, only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566)  * the node where the search should start.  The zonelist passed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567)  * __alloc_pages() will include all nodes.  If the slab allocator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568)  * is passed an offline node, it will fall back to the local node.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569)  * See kmem_cache_alloc_node().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) static int cpuset_spread_node(int *rotor)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) 	return *rotor = next_node_in(*rotor, current->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) int cpuset_mem_spread_node(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) 	if (current->cpuset_mem_spread_rotor == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) 		current->cpuset_mem_spread_rotor =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) 			node_random(&current->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) 	return cpuset_spread_node(&current->cpuset_mem_spread_rotor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) int cpuset_slab_spread_node(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) 	if (current->cpuset_slab_spread_rotor == NUMA_NO_NODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) 		current->cpuset_slab_spread_rotor =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) 			node_random(&current->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) 	return cpuset_spread_node(&current->cpuset_slab_spread_rotor);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) EXPORT_SYMBOL_GPL(cpuset_mem_spread_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598)  * cpuset_mems_allowed_intersects - Does @tsk1's mems_allowed intersect @tsk2's?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599)  * @tsk1: pointer to task_struct of some task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600)  * @tsk2: pointer to task_struct of some other task.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602)  * Description: Return true if @tsk1's mems_allowed intersects the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603)  * mems_allowed of @tsk2.  Used by the OOM killer to determine if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604)  * one of the task's memory usage might impact the memory available
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605)  * to the other.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606)  **/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) 				   const struct task_struct *tsk2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) 	return nodes_intersects(tsk1->mems_allowed, tsk2->mems_allowed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615)  * cpuset_print_current_mems_allowed - prints current's cpuset and mems_allowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617)  * Description: Prints current's name, cpuset name, and cached copy of its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618)  * mems_allowed to the kernel log.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) void cpuset_print_current_mems_allowed(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) 	struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) 	cgrp = task_cs(current)->css.cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) 	pr_cont(",cpuset=");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) 	pr_cont_cgroup_name(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) 	pr_cont(",mems_allowed=%*pbl",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) 		nodemask_pr_args(&current->mems_allowed));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636)  * Collection of memory_pressure is suppressed unless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637)  * this flag is enabled by writing "1" to the special
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638)  * cpuset file 'memory_pressure_enabled' in the root cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) int cpuset_memory_pressure_enabled __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644)  * cpuset_memory_pressure_bump - keep stats of per-cpuset reclaims.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646)  * Keep a running average of the rate of synchronous (direct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647)  * page reclaim efforts initiated by tasks in each cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649)  * This represents the rate at which some task in the cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650)  * ran low on memory on all nodes it was allowed to use, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651)  * had to enter the kernels page reclaim code in an effort to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652)  * create more free memory by tossing clean pages or swapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653)  * or writing dirty pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655)  * Display to user space in the per-cpuset read-only file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656)  * "memory_pressure".  Value displayed is an integer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657)  * representing the recent rate of entry into the synchronous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658)  * (direct) page reclaim by any task attached to the cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659)  **/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) void __cpuset_memory_pressure_bump(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) 	fmeter_markevent(&task_cs(current)->fmeter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) #ifdef CONFIG_PROC_PID_CPUSET
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670)  * proc_cpuset_show()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671)  *  - Print tasks cpuset path into seq_file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672)  *  - Used for /proc/<pid>/cpuset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673)  *  - No need to task_lock(tsk) on this tsk->cpuset reference, as it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674)  *    doesn't really matter if tsk->cpuset changes after we read it,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675)  *    and we take cpuset_mutex, keeping cpuset_attach() from changing it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676)  *    anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) 		     struct pid *pid, struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) 	char *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) 	struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) 	int retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) 	retval = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) 	buf = kmalloc(PATH_MAX, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) 	if (!buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) 	css = task_get_css(tsk, cpuset_cgrp_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) 	retval = cgroup_path_ns(css->cgroup, buf, PATH_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) 				current->nsproxy->cgroup_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) 	css_put(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) 	if (retval >= PATH_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) 		retval = -ENAMETOOLONG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) 	if (retval < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) 	seq_puts(m, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) 	seq_putc(m, '\n');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) 	retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) 	kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) 	return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) #endif /* CONFIG_PROC_PID_CPUSET */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) /* Display task mems_allowed in /proc/<pid>/status file. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) void cpuset_task_status_allowed(struct seq_file *m, struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) 	seq_printf(m, "Mems_allowed:\t%*pb\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) 		   nodemask_pr_args(&task->mems_allowed));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) 	seq_printf(m, "Mems_allowed_list:\t%*pbl\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) 		   nodemask_pr_args(&task->mems_allowed));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) }