^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Generic process-grouping system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Based originally on the cpuset system, extracted by Paul Menage
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2006 Google, Inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Notifications support
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Copyright (C) 2009 Nokia Corporation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Author: Kirill A. Shutemov
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Copyright notices from the original cpuset code:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * --------------------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Copyright (C) 2003 BULL SA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Copyright (C) 2004-2006 Silicon Graphics, Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * Portions derived from Patrick Mochel's sysfs code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * sysfs is Copyright (c) 2001-3 Patrick Mochel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * 2003-10-10 Written by Simon Derr.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * 2003-10-22 Updates by Stephen Hemminger.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * 2004 May-July Rework by Paul Jackson.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * ---------------------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * This file is subject to the terms and conditions of the GNU General Public
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * License. See the file COPYING in the main directory of the Linux
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * distribution for more details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include "cgroup-internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #include <linux/cred.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #include <linux/init_task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #include <linux/magic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #include <linux/mutex.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #include <linux/mount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #include <linux/proc_fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #include <linux/rcupdate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #include <linux/sched/task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #include <linux/percpu-rwsem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #include <linux/hashtable.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #include <linux/idr.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #include <linux/atomic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #include <linux/cpuset.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #include <linux/proc_ns.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #include <linux/nsproxy.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #include <linux/fs_parser.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #include <linux/sched/cputime.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #include <linux/psi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #include <net/sock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #include <trace/events/cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #undef CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #include <trace/hooks/cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) MAX_CFTYPE_NAME + 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) /* let's not notify more than 100 times per second */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #define CGROUP_FILE_NOTIFY_MIN_INTV DIV_ROUND_UP(HZ, 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * cgroup_mutex is the master lock. Any modification to cgroup or its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * hierarchy must be performed while holding it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * css_set_lock protects task->cgroups pointer, the list of css_set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * objects, and the chain of tasks off each css_set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * These locks are exported if CONFIG_PROVE_RCU so that accessors in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * cgroup.h can use them for lockdep annotations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) DEFINE_MUTEX(cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) DEFINE_SPINLOCK(css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #ifdef CONFIG_PROVE_RCU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) EXPORT_SYMBOL_GPL(cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) EXPORT_SYMBOL_GPL(css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) DEFINE_SPINLOCK(trace_cgroup_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) char trace_cgroup_path[TRACE_CGROUP_PATH_LEN];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) bool cgroup_debug __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * Protects cgroup_idr and css_idr so that IDs can be released without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * grabbing cgroup_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) static DEFINE_SPINLOCK(cgroup_idr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * Protects cgroup_file->kn for !self csses. It synchronizes notifications
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) * against file removal/re-creation across css hiding.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) static DEFINE_SPINLOCK(cgroup_file_kn_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) DEFINE_PERCPU_RWSEM(cgroup_threadgroup_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) #define cgroup_assert_mutex_or_rcu_locked() \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) RCU_LOCKDEP_WARN(!rcu_read_lock_held() && \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) !lockdep_is_held(&cgroup_mutex), \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) "cgroup_mutex or RCU read lock required");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * cgroup destruction makes heavy use of work items and there can be a lot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * of concurrent destructions. Use a separate workqueue so that cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * destruction work items don't end up filling up max_active of system_wq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * which may lead to deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) static struct workqueue_struct *cgroup_destroy_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /* generate an array of cgroup subsystem pointers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) struct cgroup_subsys *cgroup_subsys[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) #include <linux/cgroup_subsys.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) #undef SUBSYS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) /* array of cgroup subsystem names */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) #define SUBSYS(_x) [_x ## _cgrp_id] = #_x,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) static const char *cgroup_subsys_name[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) #include <linux/cgroup_subsys.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) #undef SUBSYS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) /* array of static_keys for cgroup_subsys_enabled() and cgroup_subsys_on_dfl() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) #define SUBSYS(_x) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_enabled_key); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) DEFINE_STATIC_KEY_TRUE(_x ## _cgrp_subsys_on_dfl_key); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_enabled_key); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) EXPORT_SYMBOL_GPL(_x ## _cgrp_subsys_on_dfl_key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) #include <linux/cgroup_subsys.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) #undef SUBSYS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_enabled_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) static struct static_key_true *cgroup_subsys_enabled_key[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) #include <linux/cgroup_subsys.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) #undef SUBSYS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) #define SUBSYS(_x) [_x ## _cgrp_id] = &_x ## _cgrp_subsys_on_dfl_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) static struct static_key_true *cgroup_subsys_on_dfl_key[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) #include <linux/cgroup_subsys.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) #undef SUBSYS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) static DEFINE_PER_CPU(struct cgroup_rstat_cpu, cgrp_dfl_root_rstat_cpu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) /* the default hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) struct cgroup_root cgrp_dfl_root = { .cgrp.rstat_cpu = &cgrp_dfl_root_rstat_cpu };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) EXPORT_SYMBOL_GPL(cgrp_dfl_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * The default hierarchy always exists but is hidden until mounted for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) * first time. This is for backward compatibility.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) static bool cgrp_dfl_visible;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) /* some controllers are not supported in the default hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) static u16 cgrp_dfl_inhibit_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) /* some controllers are implicitly enabled on the default hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) static u16 cgrp_dfl_implicit_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /* some controllers can be threaded on the default hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) static u16 cgrp_dfl_threaded_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) /* The list of hierarchy roots */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) LIST_HEAD(cgroup_roots);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) static int cgroup_root_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /* hierarchy ID allocation and mapping, protected by cgroup_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) static DEFINE_IDR(cgroup_hierarchy_idr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * Assign a monotonically increasing serial number to csses. It guarantees
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * cgroups with bigger numbers are newer than those with smaller numbers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * Also, as csses are always appended to the parent's ->children list, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * guarantees that sibling csses are always sorted in the ascending serial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * number order on the list. Protected by cgroup_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) static u64 css_serial_nr_next = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) * These bitmasks identify subsystems with specific features to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * having to do iterative checks repeatedly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) static u16 have_fork_callback __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) static u16 have_exit_callback __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) static u16 have_release_callback __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) static u16 have_canfork_callback __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /* cgroup namespace for init task */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) struct cgroup_namespace init_cgroup_ns = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) .count = REFCOUNT_INIT(2),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) .user_ns = &init_user_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) .ns.ops = &cgroupns_operations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) .ns.inum = PROC_CGROUP_INIT_INO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) .root_cset = &init_css_set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) static struct file_system_type cgroup2_fs_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) static struct cftype cgroup_base_files[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /* cgroup optional features */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) enum cgroup_opt_features {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) #ifdef CONFIG_PSI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) OPT_FEATURE_PRESSURE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) OPT_FEATURE_COUNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) static const char *cgroup_opt_feature_names[OPT_FEATURE_COUNT] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) #ifdef CONFIG_PSI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) "pressure",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) static u16 cgroup_feature_disable_mask __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) static int cgroup_apply_control(struct cgroup *cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) static void cgroup_finalize_control(struct cgroup *cgrp, int ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) static void css_task_iter_skip(struct css_task_iter *it,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) struct task_struct *task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) static int cgroup_destroy_locked(struct cgroup *cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) struct cgroup_subsys *ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) static void css_release(struct percpu_ref *ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) static void kill_css(struct cgroup_subsys_state *css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) static int cgroup_addrm_files(struct cgroup_subsys_state *css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) struct cgroup *cgrp, struct cftype cfts[],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) bool is_add);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * cgroup_ssid_enabled - cgroup subsys enabled test by subsys ID
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * @ssid: subsys ID of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * cgroup_subsys_enabled() can only be used with literal subsys names which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * is fine for individual subsystems but unsuitable for cgroup core. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) * is slower static_key_enabled() based test indexed by @ssid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) bool cgroup_ssid_enabled(int ssid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) if (CGROUP_SUBSYS_COUNT == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) return static_key_enabled(cgroup_subsys_enabled_key[ssid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * cgroup_on_dfl - test whether a cgroup is on the default hierarchy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * @cgrp: the cgroup of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * The default hierarchy is the v2 interface of cgroup and this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * can be used to test whether a cgroup is on the default hierarchy for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * cases where a subsystem should behave differnetly depending on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * interface version.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * List of changed behaviors:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) * - Mount options "noprefix", "xattr", "clone_children", "release_agent"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) * and "name" are disallowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * - When mounting an existing superblock, mount options should match.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * - Remount is disallowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * - rename(2) is disallowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * - "tasks" is removed. Everything should be at process granularity. Use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * "cgroup.procs" instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * - "cgroup.procs" is not sorted. pids will be unique unless they got
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) * recycled inbetween reads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * - "release_agent" and "notify_on_release" are removed. Replacement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * notification mechanism will be implemented.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) * - "cgroup.clone_children" is removed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * and its descendants contain no task; otherwise, 1. The file also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * generates kernfs notification which can be monitored through poll and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * [di]notify when the value of the file changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * - cpuset: tasks will be kept in empty cpusets when hotplug happens and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * take masks of ancestors with non-empty cpus/mems, instead of being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * moved to an ancestor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * - cpuset: a task can be moved into an empty cpuset, and again it takes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * masks of ancestors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * - memcg: use_hierarchy is on by default and the cgroup file for the flag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * is not created.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) * - blkcg: blk-throttle becomes properly hierarchical.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * - debug: disallowed on the default hierarchy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) bool cgroup_on_dfl(const struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) return cgrp->root == &cgrp_dfl_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) /* IDR wrappers which synchronize using cgroup_idr_lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) gfp_t gfp_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) idr_preload(gfp_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) spin_lock_bh(&cgroup_idr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) ret = idr_alloc(idr, ptr, start, end, gfp_mask & ~__GFP_DIRECT_RECLAIM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) spin_unlock_bh(&cgroup_idr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) idr_preload_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) static void *cgroup_idr_replace(struct idr *idr, void *ptr, int id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) void *ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) spin_lock_bh(&cgroup_idr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) ret = idr_replace(idr, ptr, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) spin_unlock_bh(&cgroup_idr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) static void cgroup_idr_remove(struct idr *idr, int id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) spin_lock_bh(&cgroup_idr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) idr_remove(idr, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) spin_unlock_bh(&cgroup_idr_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) static bool cgroup_has_tasks(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) return cgrp->nr_populated_csets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) bool cgroup_is_threaded(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) return cgrp->dom_cgrp != cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) /* can @cgrp host both domain and threaded children? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) static bool cgroup_is_mixable(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * Root isn't under domain level resource control exempting it from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * the no-internal-process constraint, so it can serve as a thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) * root and a parent of resource domains at the same time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) return !cgroup_parent(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) /* can @cgrp become a thread root? should always be true for a thread root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) /* mixables don't care */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) if (cgroup_is_mixable(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) /* domain roots can't be nested under threaded */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) if (cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) /* can only have either domain or threaded children */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) if (cgrp->nr_populated_domain_children)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) /* and no domain controllers can be enabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) if (cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) /* is @cgrp root of a threaded subtree? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) bool cgroup_is_thread_root(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) /* thread root should be a domain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) if (cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) /* a domain w/ threaded children is a thread root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) if (cgrp->nr_threaded_children)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * A domain which has tasks and explicit threaded controllers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) * enabled is a thread root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if (cgroup_has_tasks(cgrp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) (cgrp->subtree_control & cgrp_dfl_threaded_ss_mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) /* a domain which isn't connected to the root w/o brekage can't be used */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) static bool cgroup_is_valid_domain(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) /* the cgroup itself can be a thread root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) if (cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) /* but the ancestors can't be unless mixable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) while ((cgrp = cgroup_parent(cgrp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) if (!cgroup_is_mixable(cgrp) && cgroup_is_thread_root(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) if (cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) /* subsystems visibly enabled on a cgroup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) static u16 cgroup_control(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) struct cgroup *parent = cgroup_parent(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) u16 root_ss_mask = cgrp->root->subsys_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) if (parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) u16 ss_mask = parent->subtree_control;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) /* threaded cgroups can only have threaded controllers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) if (cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) ss_mask &= cgrp_dfl_threaded_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) return ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) if (cgroup_on_dfl(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) root_ss_mask &= ~(cgrp_dfl_inhibit_ss_mask |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) cgrp_dfl_implicit_ss_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) return root_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) /* subsystems enabled on a cgroup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) static u16 cgroup_ss_mask(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) struct cgroup *parent = cgroup_parent(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) if (parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) u16 ss_mask = parent->subtree_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) /* threaded cgroups can only have threaded controllers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) if (cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) ss_mask &= cgrp_dfl_threaded_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) return ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) return cgrp->root->subsys_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) * cgroup_css - obtain a cgroup's css for the specified subsystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * @cgrp: the cgroup of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) * @ss: the subsystem of interest (%NULL returns @cgrp->self)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) * Return @cgrp's css (cgroup_subsys_state) associated with @ss. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) * function must be called either under cgroup_mutex or rcu_read_lock() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) * the caller is responsible for pinning the returned css if it wants to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * keep accessing it outside the said locks. This function may return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) * %NULL if @cgrp doesn't have @subsys_id enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) static struct cgroup_subsys_state *cgroup_css(struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) struct cgroup_subsys *ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) if (ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) return rcu_dereference_check(cgrp->subsys[ss->id],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) lockdep_is_held(&cgroup_mutex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) return &cgrp->self;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) * cgroup_tryget_css - try to get a cgroup's css for the specified subsystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) * @cgrp: the cgroup of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) * @ss: the subsystem of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) * Find and get @cgrp's css assocaited with @ss. If the css doesn't exist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) * or is offline, %NULL is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) static struct cgroup_subsys_state *cgroup_tryget_css(struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) struct cgroup_subsys *ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) css = cgroup_css(cgrp, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) if (css && !css_tryget_online(css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) return css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * cgroup_e_css_by_mask - obtain a cgroup's effective css for the specified ss
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * @cgrp: the cgroup of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) * @ss: the subsystem of interest (%NULL returns @cgrp->self)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) * Similar to cgroup_css() but returns the effective css, which is defined
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) * as the matching css of the nearest ancestor including self which has @ss
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) * enabled. If @ss is associated with the hierarchy @cgrp is on, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) * function is guaranteed to return non-NULL css.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) struct cgroup_subsys *ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) if (!ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) return &cgrp->self;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * This function is used while updating css associations and thus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) * can't test the csses directly. Test ss_mask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) while (!(cgroup_ss_mask(cgrp) & (1 << ss->id))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) cgrp = cgroup_parent(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) if (!cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) return cgroup_css(cgrp, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) * cgroup_e_css - obtain a cgroup's effective css for the specified subsystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) * @cgrp: the cgroup of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * @ss: the subsystem of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * Find and get the effective css of @cgrp for @ss. The effective css is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) * defined as the matching css of the nearest ancestor including self which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) * the root css is returned, so this function always returns a valid css.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) * The returned css is not guaranteed to be online, and therefore it is the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) * callers responsiblity to tryget a reference for it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) struct cgroup_subsys *ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) css = cgroup_css(cgrp, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) if (css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) return css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) cgrp = cgroup_parent(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) } while (cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) return init_css_set.subsys[ss->id];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) * @cgrp: the cgroup of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) * @ss: the subsystem of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) * Find and get the effective css of @cgrp for @ss. The effective css is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) * defined as the matching css of the nearest ancestor including self which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) * has @ss enabled. If @ss is not mounted on the hierarchy @cgrp is on,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * the root css is returned, so this function always returns a valid css.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) * The returned css must be put using css_put().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) struct cgroup_subsys *ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) css = cgroup_css(cgrp, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (css && css_tryget_online(css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) cgrp = cgroup_parent(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) } while (cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) css = init_css_set.subsys[ss->id];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) css_get(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) return css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) static void cgroup_get_live(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) WARN_ON_ONCE(cgroup_is_dead(cgrp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) css_get(&cgrp->self);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) * __cgroup_task_count - count the number of tasks in a cgroup. The caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) * is responsible for taking the css_set_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) * @cgrp: the cgroup in question
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) int __cgroup_task_count(const struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) int count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) struct cgrp_cset_link *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) list_for_each_entry(link, &cgrp->cset_links, cset_link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) count += link->cset->nr_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) * cgroup_task_count - count the number of tasks in a cgroup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) * @cgrp: the cgroup in question
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) int cgroup_task_count(const struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) count = __cgroup_task_count(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) return count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) struct cgroup_subsys_state *of_css(struct kernfs_open_file *of)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) struct cgroup *cgrp = of->kn->parent->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) struct cftype *cft = of_cft(of);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) * This is open and unprotected implementation of cgroup_css().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) * seq_css() is only called from a kernfs file operation which has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) * an active reference on the file. Because all the subsystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) * files are drained before a css is disassociated with a cgroup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) * the matching css from the cgroup's subsys table is guaranteed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) * be and stay valid until the enclosing operation is complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) if (cft->ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) return rcu_dereference_raw(cgrp->subsys[cft->ss->id]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) return &cgrp->self;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) EXPORT_SYMBOL_GPL(of_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) * for_each_css - iterate all css's of a cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) * @css: the iteration cursor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) * @cgrp: the target cgroup to iterate css's of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) * Should be called under cgroup_[tree_]mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) #define for_each_css(css, ssid, cgrp) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) if (!((css) = rcu_dereference_check( \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) (cgrp)->subsys[(ssid)], \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) lockdep_is_held(&cgroup_mutex)))) { } \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) * for_each_e_css - iterate all effective css's of a cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) * @css: the iteration cursor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) * @ssid: the index of the subsystem, CGROUP_SUBSYS_COUNT after reaching the end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) * @cgrp: the target cgroup to iterate css's of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) * Should be called under cgroup_[tree_]mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) #define for_each_e_css(css, ssid, cgrp) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) for ((ssid) = 0; (ssid) < CGROUP_SUBSYS_COUNT; (ssid)++) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) if (!((css) = cgroup_e_css_by_mask(cgrp, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) cgroup_subsys[(ssid)]))) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) ; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) * do_each_subsys_mask - filter for_each_subsys with a bitmask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * @ss: the iteration cursor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * @ssid: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * @ss_mask: the bitmask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) * The block will only run for cases where the ssid-th bit (1 << ssid) of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) * @ss_mask is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) #define do_each_subsys_mask(ss, ssid, ss_mask) do { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) unsigned long __ss_mask = (ss_mask); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) if (!CGROUP_SUBSYS_COUNT) { /* to avoid spurious gcc warning */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) (ssid) = 0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) break; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) } \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) for_each_set_bit(ssid, &__ss_mask, CGROUP_SUBSYS_COUNT) { \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) (ss) = cgroup_subsys[ssid]; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) #define while_each_subsys_mask() \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) } \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) } \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) } while (false)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) /* iterate over child cgrps, lock should be held throughout iteration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) #define cgroup_for_each_live_child(child, cgrp) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) list_for_each_entry((child), &(cgrp)->self.children, self.sibling) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) if (({ lockdep_assert_held(&cgroup_mutex); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) cgroup_is_dead(child); })) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) ; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) /* walk live descendants in preorder */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL)) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) if (({ lockdep_assert_held(&cgroup_mutex); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) (dsct) = (d_css)->cgroup; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) cgroup_is_dead(dsct); })) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) ; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) /* walk live descendants in postorder */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) #define cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) css_for_each_descendant_post((d_css), cgroup_css((cgrp), NULL)) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) if (({ lockdep_assert_held(&cgroup_mutex); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) (dsct) = (d_css)->cgroup; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) cgroup_is_dead(dsct); })) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) ; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) * The default css_set - used by init and its children prior to any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) * hierarchies being mounted. It contains a pointer to the root state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) * for each subsystem. Also used to anchor the list of css_sets. Not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) * reference-counted, to improve performance when child cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) * haven't been created.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) struct css_set init_css_set = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) .refcount = REFCOUNT_INIT(1),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) .dom_cset = &init_css_set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) .tasks = LIST_HEAD_INIT(init_css_set.tasks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) .mg_tasks = LIST_HEAD_INIT(init_css_set.mg_tasks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) .dying_tasks = LIST_HEAD_INIT(init_css_set.dying_tasks),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) .task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) .threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) .cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) .mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) * The following field is re-initialized when this cset gets linked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) * in cgroup_init(). However, let's initialize the field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) * statically too so that the default cgroup can be accessed safely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) * early during boot.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) .dfl_cgrp = &cgrp_dfl_root.cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) static int css_set_count = 1; /* 1 for init_css_set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) static bool css_set_threaded(struct css_set *cset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) return cset->dom_cset != cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) * css_set_populated - does a css_set contain any tasks?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * @cset: target css_set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * css_set_populated() should be the same as !!cset->nr_tasks at steady
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * state. However, css_set_populated() can be called while a task is being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) * added to or removed from the linked list before the nr_tasks is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) * properly updated. Hence, we can't just look at ->nr_tasks here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) static bool css_set_populated(struct css_set *cset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) return !list_empty(&cset->tasks) || !list_empty(&cset->mg_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) * cgroup_update_populated - update the populated count of a cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) * @cgrp: the target cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) * @populated: inc or dec populated count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) * One of the css_sets associated with @cgrp is either getting its first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) * task or losing the last. Update @cgrp->nr_populated_* accordingly. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) * count is propagated towards root so that a given cgroup's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) * nr_populated_children is zero iff none of its descendants contain any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) * tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) * @cgrp's interface file "cgroup.populated" is zero if both
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) * @cgrp->nr_populated_csets and @cgrp->nr_populated_children are zero and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) * 1 otherwise. When the sum changes from or to zero, userland is notified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) * that the content of the interface file has changed. This can be used to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) * detect when @cgrp and its descendants become populated or empty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) static void cgroup_update_populated(struct cgroup *cgrp, bool populated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) struct cgroup *child = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) int adj = populated ? 1 : -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) bool was_populated = cgroup_is_populated(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) if (!child) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) cgrp->nr_populated_csets += adj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) if (cgroup_is_threaded(child))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) cgrp->nr_populated_threaded_children += adj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) cgrp->nr_populated_domain_children += adj;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) if (was_populated == cgroup_is_populated(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) cgroup1_check_for_release(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) TRACE_CGROUP_PATH(notify_populated, cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) cgroup_is_populated(cgrp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) cgroup_file_notify(&cgrp->events_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) child = cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) cgrp = cgroup_parent(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) } while (cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) * css_set_update_populated - update populated state of a css_set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * @cset: target css_set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) * @populated: whether @cset is populated or depopulated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) * @cset is either getting the first task or losing the last. Update the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) * populated counters of all associated cgroups accordingly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) static void css_set_update_populated(struct css_set *cset, bool populated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) struct cgrp_cset_link *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) list_for_each_entry(link, &cset->cgrp_links, cgrp_link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) cgroup_update_populated(link->cgrp, populated);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) * @task is leaving, advance task iterators which are pointing to it so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) * that they can resume at the next position. Advancing an iterator might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) * remove it from the list, use safe walk. See css_task_iter_skip() for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) * details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) static void css_set_skip_task_iters(struct css_set *cset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) struct css_task_iter *it, *pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) list_for_each_entry_safe(it, pos, &cset->task_iters, iters_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) css_task_iter_skip(it, task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) * css_set_move_task - move a task from one css_set to another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) * @task: task being moved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) * @from_cset: css_set @task currently belongs to (may be NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) * @to_cset: new css_set @task is being moved to (may be NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) * @use_mg_tasks: move to @to_cset->mg_tasks instead of ->tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) * Move @task from @from_cset to @to_cset. If @task didn't belong to any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) * css_set, @from_cset can be NULL. If @task is being disassociated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) * instead of moved, @to_cset can be NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) * This function automatically handles populated counter updates and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) * css_task_iter adjustments but the caller is responsible for managing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) * @from_cset and @to_cset's reference counts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) static void css_set_move_task(struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) struct css_set *from_cset, struct css_set *to_cset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) bool use_mg_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) if (to_cset && !css_set_populated(to_cset))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) css_set_update_populated(to_cset, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) if (from_cset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) WARN_ON_ONCE(list_empty(&task->cg_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) css_set_skip_task_iters(from_cset, task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) list_del_init(&task->cg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) if (!css_set_populated(from_cset))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) css_set_update_populated(from_cset, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) WARN_ON_ONCE(!list_empty(&task->cg_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) if (to_cset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) * We are synchronized through cgroup_threadgroup_rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) * against PF_EXITING setting such that we can't race
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) * against cgroup_exit()/cgroup_free() dropping the css_set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) WARN_ON_ONCE(task->flags & PF_EXITING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) cgroup_move_task(task, to_cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) list_add_tail(&task->cg_list, use_mg_tasks ? &to_cset->mg_tasks :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) &to_cset->tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) * hash table for cgroup groups. This improves the performance to find
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) * an existing css_set. This hash doesn't (currently) take into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) * account cgroups in empty hierarchies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) #define CSS_SET_HASH_BITS 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) unsigned long key = 0UL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) for_each_subsys(ss, i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) key += (unsigned long)css[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) key = (key >> 16) ^ key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) return key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) void put_css_set_locked(struct css_set *cset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) struct cgrp_cset_link *link, *tmp_link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) if (!refcount_dec_and_test(&cset->refcount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) /* This css_set is dead. unlink it and release cgroup and css refs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) for_each_subsys(ss, ssid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) list_del(&cset->e_cset_node[ssid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) css_put(cset->subsys[ssid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) hash_del(&cset->hlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) css_set_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) list_del(&link->cset_link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) list_del(&link->cgrp_link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) if (cgroup_parent(link->cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) cgroup_put(link->cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) kfree(link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) if (css_set_threaded(cset)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) list_del(&cset->threaded_csets_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) put_css_set_locked(cset->dom_cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) kfree_rcu(cset, rcu_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) * compare_css_sets - helper function for find_existing_css_set().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) * @cset: candidate css_set being tested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) * @old_cset: existing css_set for a task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) * @new_cgrp: cgroup that's being entered by the task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) * @template: desired set of css pointers in css_set (pre-calculated)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) * Returns true if "cset" matches "old_cset" except for the hierarchy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) * which "new_cgrp" belongs to, for which it should match "new_cgrp".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) static bool compare_css_sets(struct css_set *cset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) struct css_set *old_cset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) struct cgroup *new_cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) struct cgroup_subsys_state *template[])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) struct cgroup *new_dfl_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) struct list_head *l1, *l2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) * On the default hierarchy, there can be csets which are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) * associated with the same set of cgroups but different csses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) * Let's first ensure that csses match.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) if (memcmp(template, cset->subsys, sizeof(cset->subsys)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) /* @cset's domain should match the default cgroup's */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) if (cgroup_on_dfl(new_cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) new_dfl_cgrp = new_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) new_dfl_cgrp = old_cset->dfl_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) if (new_dfl_cgrp->dom_cgrp != cset->dom_cset->dfl_cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) * Compare cgroup pointers in order to distinguish between
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) * different cgroups in hierarchies. As different cgroups may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) * share the same effective css, this comparison is always
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) * necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) l1 = &cset->cgrp_links;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) l2 = &old_cset->cgrp_links;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) while (1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) struct cgrp_cset_link *link1, *link2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) struct cgroup *cgrp1, *cgrp2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) l1 = l1->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) l2 = l2->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) /* See if we reached the end - both lists are equal length. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) if (l1 == &cset->cgrp_links) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) BUG_ON(l2 != &old_cset->cgrp_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) BUG_ON(l2 == &old_cset->cgrp_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) /* Locate the cgroups associated with these links. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) link1 = list_entry(l1, struct cgrp_cset_link, cgrp_link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) link2 = list_entry(l2, struct cgrp_cset_link, cgrp_link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) cgrp1 = link1->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) cgrp2 = link2->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) /* Hierarchies should be linked in the same order. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) BUG_ON(cgrp1->root != cgrp2->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) * If this hierarchy is the hierarchy of the cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) * that's changing, then we need to check that this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) * css_set points to the new cgroup; if it's any other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) * hierarchy, then this css_set should point to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) * same cgroup as the old css_set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) if (cgrp1->root == new_cgrp->root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) if (cgrp1 != new_cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) if (cgrp1 != cgrp2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) * find_existing_css_set - init css array and find the matching css_set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) * @old_cset: the css_set that we're using before the cgroup transition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) * @cgrp: the cgroup that we're moving into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) * @template: out param for the new set of csses, should be clear on entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) static struct css_set *find_existing_css_set(struct css_set *old_cset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) struct cgroup_subsys_state *template[])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) struct cgroup_root *root = cgrp->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) unsigned long key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) * Build the set of subsystem state objects that we want to see in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) * new css_set. while subsystems can change globally, the entries here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) * won't change, so no need for locking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) for_each_subsys(ss, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) if (root->subsys_mask & (1UL << i)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) * @ss is in this hierarchy, so we want the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) * effective css from @cgrp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) template[i] = cgroup_e_css_by_mask(cgrp, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) * @ss is not in this hierarchy, so we don't want
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) * to change the css.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) template[i] = old_cset->subsys[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) key = css_set_hash(template);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) hash_for_each_possible(css_set_table, cset, hlist, key) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) if (!compare_css_sets(cset, old_cset, cgrp, template))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) /* This css_set matches what we need */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) return cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) /* No existing cgroup group matched */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) static void free_cgrp_cset_links(struct list_head *links_to_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) struct cgrp_cset_link *link, *tmp_link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) list_for_each_entry_safe(link, tmp_link, links_to_free, cset_link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) list_del(&link->cset_link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) kfree(link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) * allocate_cgrp_cset_links - allocate cgrp_cset_links
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) * @count: the number of links to allocate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) * @tmp_links: list_head the allocated links are put on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) * Allocate @count cgrp_cset_link structures and chain them on @tmp_links
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) * through ->cset_link. Returns 0 on success or -errno.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) static int allocate_cgrp_cset_links(int count, struct list_head *tmp_links)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) struct cgrp_cset_link *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) INIT_LIST_HEAD(tmp_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) for (i = 0; i < count; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) link = kzalloc(sizeof(*link), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) if (!link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) free_cgrp_cset_links(tmp_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) list_add(&link->cset_link, tmp_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) * link_css_set - a helper function to link a css_set to a cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) * @tmp_links: cgrp_cset_link objects allocated by allocate_cgrp_cset_links()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * @cset: the css_set to be linked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) * @cgrp: the destination cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) struct cgrp_cset_link *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) BUG_ON(list_empty(tmp_links));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) if (cgroup_on_dfl(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) cset->dfl_cgrp = cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) link = list_first_entry(tmp_links, struct cgrp_cset_link, cset_link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) link->cset = cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) link->cgrp = cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) * Always add links to the tail of the lists so that the lists are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) * in choronological order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) list_move_tail(&link->cset_link, &cgrp->cset_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) list_add_tail(&link->cgrp_link, &cset->cgrp_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) if (cgroup_parent(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) cgroup_get_live(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) * find_css_set - return a new css_set with one cgroup updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) * @old_cset: the baseline css_set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) * @cgrp: the cgroup to be updated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) * Return a new css_set that's equivalent to @old_cset, but with @cgrp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) * substituted into the appropriate hierarchy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) static struct css_set *find_css_set(struct css_set *old_cset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) struct cgroup_subsys_state *template[CGROUP_SUBSYS_COUNT] = { };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) struct list_head tmp_links;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) struct cgrp_cset_link *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) unsigned long key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) /* First see if we already have a cgroup group that matches
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) * the desired set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) cset = find_existing_css_set(old_cset, cgrp, template);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) if (cset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) get_css_set(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) if (cset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) return cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) cset = kzalloc(sizeof(*cset), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) if (!cset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) /* Allocate all the cgrp_cset_link objects that we'll need */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) if (allocate_cgrp_cset_links(cgroup_root_count, &tmp_links) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) kfree(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) refcount_set(&cset->refcount, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) cset->dom_cset = cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) INIT_LIST_HEAD(&cset->tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) INIT_LIST_HEAD(&cset->mg_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) INIT_LIST_HEAD(&cset->dying_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) INIT_LIST_HEAD(&cset->task_iters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) INIT_LIST_HEAD(&cset->threaded_csets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) INIT_HLIST_NODE(&cset->hlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) INIT_LIST_HEAD(&cset->cgrp_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) INIT_LIST_HEAD(&cset->mg_preload_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) INIT_LIST_HEAD(&cset->mg_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) /* Copy the set of subsystem state objects generated in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) * find_existing_css_set() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) memcpy(cset->subsys, template, sizeof(cset->subsys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) /* Add reference counts and links from the new css_set. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) list_for_each_entry(link, &old_cset->cgrp_links, cgrp_link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) struct cgroup *c = link->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) if (c->root == cgrp->root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) c = cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) link_css_set(&tmp_links, cset, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) BUG_ON(!list_empty(&tmp_links));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) css_set_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) /* Add @cset to the hash table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) key = css_set_hash(cset->subsys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) hash_add(css_set_table, &cset->hlist, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) for_each_subsys(ss, ssid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) struct cgroup_subsys_state *css = cset->subsys[ssid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) list_add_tail(&cset->e_cset_node[ssid],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) &css->cgroup->e_csets[ssid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) css_get(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) * If @cset should be threaded, look up the matching dom_cset and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) * link them up. We first fully initialize @cset then look for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) * dom_cset. It's simpler this way and safe as @cset is guaranteed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) * to stay empty until we return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) if (cgroup_is_threaded(cset->dfl_cgrp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) struct css_set *dcset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) dcset = find_css_set(cset, cset->dfl_cgrp->dom_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) if (!dcset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) put_css_set(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) cset->dom_cset = dcset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) list_add_tail(&cset->threaded_csets_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) &dcset->threaded_csets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) return cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) struct cgroup_root *cgroup_root_from_kf(struct kernfs_root *kf_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) struct cgroup *root_cgrp = kf_root->kn->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) return root_cgrp->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) static int cgroup_init_root_id(struct cgroup_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) int id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) id = idr_alloc_cyclic(&cgroup_hierarchy_idr, root, 0, 0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) if (id < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) return id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) root->hierarchy_id = id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) static void cgroup_exit_root_id(struct cgroup_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) idr_remove(&cgroup_hierarchy_idr, root->hierarchy_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) void cgroup_free_root(struct cgroup_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) kfree(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) static void cgroup_destroy_root(struct cgroup_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) struct cgroup *cgrp = &root->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) struct cgrp_cset_link *link, *tmp_link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) trace_cgroup_destroy_root(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) BUG_ON(atomic_read(&root->nr_cgrps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) BUG_ON(!list_empty(&cgrp->self.children));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) /* Rebind all subsystems back to the default hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) WARN_ON(rebind_subsystems(&cgrp_dfl_root, root->subsys_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) * Release all the links from cset_links to this hierarchy's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) * root cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) list_for_each_entry_safe(link, tmp_link, &cgrp->cset_links, cset_link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) list_del(&link->cset_link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) list_del(&link->cgrp_link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) kfree(link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) if (!list_empty(&root->root_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) list_del(&root->root_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) cgroup_root_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) cgroup_exit_root_id(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) kernfs_destroy_root(root->kf_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) cgroup_free_root(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) * look up cgroup associated with current task's cgroup namespace on the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) * specified hierarchy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) static struct cgroup *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) current_cgns_cgroup_from_root(struct cgroup_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) struct cgroup *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) cset = current->nsproxy->cgroup_ns->root_cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) if (cset == &init_css_set) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) res = &root->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) } else if (root == &cgrp_dfl_root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) res = cset->dfl_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) struct cgrp_cset_link *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) struct cgroup *c = link->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) if (c->root == root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) res = c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) BUG_ON(!res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) return res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) /* look up cgroup associated with given css_set on the specified hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) static struct cgroup *cset_cgroup_from_root(struct css_set *cset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) struct cgroup_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) struct cgroup *res = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) if (cset == &init_css_set) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) res = &root->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) } else if (root == &cgrp_dfl_root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) res = cset->dfl_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) struct cgrp_cset_link *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) list_for_each_entry(link, &cset->cgrp_links, cgrp_link) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) struct cgroup *c = link->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) if (c->root == root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) res = c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) BUG_ON(!res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) return res;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) * Return the cgroup for "task" from the given hierarchy. Must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) * called with cgroup_mutex and css_set_lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) struct cgroup *task_cgroup_from_root(struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) struct cgroup_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) * No need to lock the task - since we hold css_set_lock the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) * task can't change groups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) return cset_cgroup_from_root(task_css_set(task), root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) * A task must hold cgroup_mutex to modify cgroups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) * Any task can increment and decrement the count field without lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) * So in general, code holding cgroup_mutex can't rely on the count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) * field not changing. However, if the count goes to zero, then only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) * cgroup_attach_task() can increment it again. Because a count of zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) * means that no tasks are currently attached, therefore there is no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) * way a task attached to that cgroup can fork (the other way to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) * increment the count). So code holding cgroup_mutex can safely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) * assume that if the count is zero, it will stay zero. Similarly, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) * a task holds cgroup_mutex on a cgroup with zero count, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) * knows that the cgroup won't be removed, as cgroup_rmdir()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) * needs that mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) * A cgroup can only be deleted if both its 'count' of using tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) * is zero, and its list of 'children' cgroups is empty. Since all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) * tasks in the system use _some_ cgroup, and since there is always at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) * least one task in the system (init, pid == 1), therefore, root cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) * always has either children cgroups and/or using tasks. So we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) * need a special hack to ensure that root cgroup cannot be deleted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) * P.S. One more locking exception. RCU is used to guard the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) * update of a tasks cgroup pointer by cgroup_attach_task()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) static struct kernfs_syscall_ops cgroup_kf_syscall_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) static char *cgroup_file_name(struct cgroup *cgrp, const struct cftype *cft,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) struct cgroup_subsys *ss = cft->ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) if (cft->ss && !(cft->flags & CFTYPE_NO_PREFIX) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) !(cgrp->root->flags & CGRP_ROOT_NOPREFIX)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) const char *dbg = (cft->flags & CFTYPE_DEBUG) ? ".__DEBUG__." : "";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) snprintf(buf, CGROUP_FILE_NAME_MAX, "%s%s.%s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) dbg, cgroup_on_dfl(cgrp) ? ss->name : ss->legacy_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) cft->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) strscpy(buf, cft->name, CGROUP_FILE_NAME_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) return buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) * cgroup_file_mode - deduce file mode of a control file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) * @cft: the control file in question
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) * S_IRUGO for read, S_IWUSR for write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) static umode_t cgroup_file_mode(const struct cftype *cft)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) umode_t mode = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) if (cft->read_u64 || cft->read_s64 || cft->seq_show)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) mode |= S_IRUGO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) if (cft->write_u64 || cft->write_s64 || cft->write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) if (cft->flags & CFTYPE_WORLD_WRITABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) mode |= S_IWUGO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) mode |= S_IWUSR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) return mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) * cgroup_calc_subtree_ss_mask - calculate subtree_ss_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) * @subtree_control: the new subtree_control mask to consider
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) * @this_ss_mask: available subsystems
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) * On the default hierarchy, a subsystem may request other subsystems to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) * enabled together through its ->depends_on mask. In such cases, more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) * subsystems than specified in "cgroup.subtree_control" may be enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) * This function calculates which subsystems need to be enabled if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) * @subtree_control is to be applied while restricted to @this_ss_mask.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) static u16 cgroup_calc_subtree_ss_mask(u16 subtree_control, u16 this_ss_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) u16 cur_ss_mask = subtree_control;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) cur_ss_mask |= cgrp_dfl_implicit_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) u16 new_ss_mask = cur_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) do_each_subsys_mask(ss, ssid, cur_ss_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) new_ss_mask |= ss->depends_on;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) * Mask out subsystems which aren't available. This can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) * happen only if some depended-upon subsystems were bound
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) * to non-default hierarchies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) new_ss_mask &= this_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) if (new_ss_mask == cur_ss_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) cur_ss_mask = new_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) return cur_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) * cgroup_kn_unlock - unlocking helper for cgroup kernfs methods
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) * @kn: the kernfs_node being serviced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) * This helper undoes cgroup_kn_lock_live() and should be invoked before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) * the method finishes if locking succeeded. Note that once this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) * returns the cgroup returned by cgroup_kn_lock_live() may become
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) * inaccessible any time. If the caller intends to continue to access the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) * cgroup, it should pin it before invoking this function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) void cgroup_kn_unlock(struct kernfs_node *kn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) if (kernfs_type(kn) == KERNFS_DIR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) cgrp = kn->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) cgrp = kn->parent->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) kernfs_unbreak_active_protection(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) cgroup_put(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) * cgroup_kn_lock_live - locking helper for cgroup kernfs methods
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) * @kn: the kernfs_node being serviced
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) * @drain_offline: perform offline draining on the cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) * This helper is to be used by a cgroup kernfs method currently servicing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) * @kn. It breaks the active protection, performs cgroup locking and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) * verifies that the associated cgroup is alive. Returns the cgroup if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) * alive; otherwise, %NULL. A successful return should be undone by a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) * matching cgroup_kn_unlock() invocation. If @drain_offline is %true, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) * cgroup is drained of offlining csses before return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) * Any cgroup kernfs method implementation which requires locking the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) * associated cgroup should use this helper. It avoids nesting cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) * locking under kernfs active protection and allows all kernfs operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) * including self-removal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) struct cgroup *cgroup_kn_lock_live(struct kernfs_node *kn, bool drain_offline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) if (kernfs_type(kn) == KERNFS_DIR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) cgrp = kn->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) cgrp = kn->parent->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) * We're gonna grab cgroup_mutex which nests outside kernfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) * active_ref. cgroup liveliness check alone provides enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) * protection against removal. Ensure @cgrp stays accessible and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) * break the active_ref protection.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) if (!cgroup_tryget(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) kernfs_break_active_protection(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) if (drain_offline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) cgroup_lock_and_drain_offline(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) if (!cgroup_is_dead(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) return cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) cgroup_kn_unlock(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) static void cgroup_rm_file(struct cgroup *cgrp, const struct cftype *cft)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) char name[CGROUP_FILE_NAME_MAX];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) if (cft->file_offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) struct cgroup_subsys_state *css = cgroup_css(cgrp, cft->ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) struct cgroup_file *cfile = (void *)css + cft->file_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) spin_lock_irq(&cgroup_file_kn_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) cfile->kn = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) spin_unlock_irq(&cgroup_file_kn_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) del_timer_sync(&cfile->notify_timer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) kernfs_remove_by_name(cgrp->kn, cgroup_file_name(cgrp, cft, name));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) * css_clear_dir - remove subsys files in a cgroup directory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) * @css: taget css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) static void css_clear_dir(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) struct cgroup *cgrp = css->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) struct cftype *cfts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) if (!(css->flags & CSS_VISIBLE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) css->flags &= ~CSS_VISIBLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) if (!css->ss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) if (cgroup_on_dfl(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) cfts = cgroup_base_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) cfts = cgroup1_base_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) cgroup_addrm_files(css, cgrp, cfts, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) list_for_each_entry(cfts, &css->ss->cfts, node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) cgroup_addrm_files(css, cgrp, cfts, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) * css_populate_dir - create subsys files in a cgroup directory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) * @css: target css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) * On failure, no file is added.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) static int css_populate_dir(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) struct cgroup *cgrp = css->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) struct cftype *cfts, *failed_cfts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) if ((css->flags & CSS_VISIBLE) || !cgrp->kn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) if (!css->ss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) if (cgroup_on_dfl(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) cfts = cgroup_base_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) cfts = cgroup1_base_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) ret = cgroup_addrm_files(&cgrp->self, cgrp, cfts, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) list_for_each_entry(cfts, &css->ss->cfts, node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) ret = cgroup_addrm_files(css, cgrp, cfts, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) failed_cfts = cfts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) css->flags |= CSS_VISIBLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) list_for_each_entry(cfts, &css->ss->cfts, node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) if (cfts == failed_cfts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) cgroup_addrm_files(css, cgrp, cfts, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) struct cgroup *dcgrp = &dst_root->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) int ssid, i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) u16 dfl_disable_ss_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) do_each_subsys_mask(ss, ssid, ss_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) * If @ss has non-root csses attached to it, can't move.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) * If @ss is an implicit controller, it is exempt from this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) * rule and can be stolen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) if (css_next_child(NULL, cgroup_css(&ss->root->cgrp, ss)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) !ss->implicit_on_dfl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) /* can't move between two non-dummy roots either */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) if (ss->root != &cgrp_dfl_root && dst_root != &cgrp_dfl_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) * Collect ssid's that need to be disabled from default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) * hierarchy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) if (ss->root == &cgrp_dfl_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) dfl_disable_ss_mask |= 1 << ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) if (dfl_disable_ss_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) struct cgroup *scgrp = &cgrp_dfl_root.cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) * Controllers from default hierarchy that need to be rebound
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) * are all disabled together in one go.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) cgrp_dfl_root.subsys_mask &= ~dfl_disable_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) WARN_ON(cgroup_apply_control(scgrp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) cgroup_finalize_control(scgrp, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) do_each_subsys_mask(ss, ssid, ss_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) struct cgroup_root *src_root = ss->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) struct cgroup *scgrp = &src_root->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) struct cgroup_subsys_state *css = cgroup_css(scgrp, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) WARN_ON(!css || cgroup_css(dcgrp, ss));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) if (src_root != &cgrp_dfl_root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) /* disable from the source */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) src_root->subsys_mask &= ~(1 << ssid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) WARN_ON(cgroup_apply_control(scgrp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) cgroup_finalize_control(scgrp, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) /* rebind */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) RCU_INIT_POINTER(scgrp->subsys[ssid], NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) rcu_assign_pointer(dcgrp->subsys[ssid], css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) ss->root = dst_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) css->cgroup = dcgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) hash_for_each(css_set_table, i, cset, hlist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) list_move_tail(&cset->e_cset_node[ss->id],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) &dcgrp->e_csets[ss->id]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) /* default hierarchy doesn't enable controllers by default */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) dst_root->subsys_mask |= 1 << ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) if (dst_root == &cgrp_dfl_root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) static_branch_enable(cgroup_subsys_on_dfl_key[ssid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) dcgrp->subtree_control |= 1 << ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) static_branch_disable(cgroup_subsys_on_dfl_key[ssid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) ret = cgroup_apply_control(dcgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) pr_warn("partial failure to rebind %s controller (err=%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) ss->name, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) if (ss->bind)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) ss->bind(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) kernfs_activate(dcgrp->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) int cgroup_show_path(struct seq_file *sf, struct kernfs_node *kf_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) struct kernfs_root *kf_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) int len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) char *buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) struct cgroup_root *kf_cgroot = cgroup_root_from_kf(kf_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) struct cgroup *ns_cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) buf = kmalloc(PATH_MAX, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) if (!buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) ns_cgroup = current_cgns_cgroup_from_root(kf_cgroot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) len = kernfs_path_from_node(kf_node, ns_cgroup->kn, buf, PATH_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) if (len >= PATH_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) len = -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) else if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) seq_escape(sf, buf, " \t\n\\");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) enum cgroup2_param {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) Opt_nsdelegate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) Opt_memory_localevents,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) Opt_memory_recursiveprot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) nr__cgroup2_params
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) static const struct fs_parameter_spec cgroup2_fs_parameters[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) fsparam_flag("nsdelegate", Opt_nsdelegate),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) fsparam_flag("memory_localevents", Opt_memory_localevents),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) fsparam_flag("memory_recursiveprot", Opt_memory_recursiveprot),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) static int cgroup2_parse_param(struct fs_context *fc, struct fs_parameter *param)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) struct fs_parse_result result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) int opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) opt = fs_parse(fc, cgroup2_fs_parameters, param, &result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) if (opt < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) return opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) switch (opt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) case Opt_nsdelegate:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) ctx->flags |= CGRP_ROOT_NS_DELEGATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) case Opt_memory_localevents:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) ctx->flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) case Opt_memory_recursiveprot:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) ctx->flags |= CGRP_ROOT_MEMORY_RECURSIVE_PROT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) static void apply_cgroup_root_flags(unsigned int root_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) if (current->nsproxy->cgroup_ns == &init_cgroup_ns) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) if (root_flags & CGRP_ROOT_NS_DELEGATE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) cgrp_dfl_root.flags |= CGRP_ROOT_NS_DELEGATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) cgrp_dfl_root.flags &= ~CGRP_ROOT_NS_DELEGATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) if (root_flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_LOCAL_EVENTS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_LOCAL_EVENTS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) if (root_flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) cgrp_dfl_root.flags |= CGRP_ROOT_MEMORY_RECURSIVE_PROT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) cgrp_dfl_root.flags &= ~CGRP_ROOT_MEMORY_RECURSIVE_PROT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) static int cgroup_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) if (cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) seq_puts(seq, ",nsdelegate");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_LOCAL_EVENTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) seq_puts(seq, ",memory_localevents");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) if (cgrp_dfl_root.flags & CGRP_ROOT_MEMORY_RECURSIVE_PROT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) seq_puts(seq, ",memory_recursiveprot");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) static int cgroup_reconfigure(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) apply_cgroup_root_flags(ctx->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) static void init_cgroup_housekeeping(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) INIT_LIST_HEAD(&cgrp->self.sibling);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) INIT_LIST_HEAD(&cgrp->self.children);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) INIT_LIST_HEAD(&cgrp->cset_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) INIT_LIST_HEAD(&cgrp->pidlists);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) mutex_init(&cgrp->pidlist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) cgrp->self.cgroup = cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) cgrp->self.flags |= CSS_ONLINE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) cgrp->dom_cgrp = cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) cgrp->max_descendants = INT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) cgrp->max_depth = INT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) INIT_LIST_HEAD(&cgrp->rstat_css_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) prev_cputime_init(&cgrp->prev_cputime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) for_each_subsys(ss, ssid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) init_waitqueue_head(&cgrp->offline_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) INIT_WORK(&cgrp->release_agent_work, cgroup1_release_agent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) void init_cgroup_root(struct cgroup_fs_context *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) struct cgroup_root *root = ctx->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) struct cgroup *cgrp = &root->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) INIT_LIST_HEAD(&root->root_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) atomic_set(&root->nr_cgrps, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) cgrp->root = root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) init_cgroup_housekeeping(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) root->flags = ctx->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) if (ctx->release_agent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) strscpy(root->release_agent_path, ctx->release_agent, PATH_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) if (ctx->name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) strscpy(root->name, ctx->name, MAX_CGROUP_ROOT_NAMELEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) if (ctx->cpuset_clone_children)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) LIST_HEAD(tmp_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) struct cgroup *root_cgrp = &root->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) struct kernfs_syscall_ops *kf_sops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) * We're accessing css_set_count without locking css_set_lock here,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) * but that's OK - it can only be increased by someone holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) * cgroup_lock, and that's us. Later rebinding may disable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) * controllers on the default hierarchy and thus create new csets,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) * which can't be more than the existing ones. Allocate 2x.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) ret = allocate_cgrp_cset_links(2 * css_set_count, &tmp_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) goto cancel_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) ret = cgroup_init_root_id(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) goto cancel_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) kf_sops = root == &cgrp_dfl_root ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) &cgroup_kf_syscall_ops : &cgroup1_kf_syscall_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) root->kf_root = kernfs_create_root(kf_sops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) KERNFS_ROOT_CREATE_DEACTIVATED |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) KERNFS_ROOT_SUPPORT_EXPORTOP |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) KERNFS_ROOT_SUPPORT_USER_XATTR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) root_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) if (IS_ERR(root->kf_root)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) ret = PTR_ERR(root->kf_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) goto exit_root_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) root_cgrp->kn = root->kf_root->kn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) WARN_ON_ONCE(cgroup_ino(root_cgrp) != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) root_cgrp->ancestor_ids[0] = cgroup_id(root_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) ret = css_populate_dir(&root_cgrp->self);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) goto destroy_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) ret = rebind_subsystems(root, ss_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) goto destroy_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) ret = cgroup_bpf_inherit(root_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) WARN_ON_ONCE(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) trace_cgroup_setup_root(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) * There must be no failure case after here, since rebinding takes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) * care of subsystems' refcounts, which are explicitly dropped in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) * the failure exit path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) list_add(&root->root_list, &cgroup_roots);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) cgroup_root_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) * Link the root cgroup in this hierarchy into all the css_set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) * objects.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) hash_for_each(css_set_table, i, cset, hlist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) link_css_set(&tmp_links, cset, root_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) if (css_set_populated(cset))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) cgroup_update_populated(root_cgrp, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) BUG_ON(!list_empty(&root_cgrp->self.children));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) BUG_ON(atomic_read(&root->nr_cgrps) != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) destroy_root:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) kernfs_destroy_root(root->kf_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) root->kf_root = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) exit_root_id:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) cgroup_exit_root_id(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) cancel_ref:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) percpu_ref_exit(&root_cgrp->self.refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) free_cgrp_cset_links(&tmp_links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) int cgroup_do_get_tree(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) ctx->kfc.root = ctx->root->kf_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) if (fc->fs_type == &cgroup2_fs_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) ctx->kfc.magic = CGROUP2_SUPER_MAGIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) ctx->kfc.magic = CGROUP_SUPER_MAGIC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) ret = kernfs_get_tree(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) * In non-init cgroup namespace, instead of root cgroup's dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) * we return the dentry corresponding to the cgroupns->root_cgrp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) if (!ret && ctx->ns != &init_cgroup_ns) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) struct dentry *nsdentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) struct super_block *sb = fc->root->d_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) cgrp = cset_cgroup_from_root(ctx->ns->root_cset, ctx->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) nsdentry = kernfs_node_dentry(cgrp->kn, sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) dput(fc->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) if (IS_ERR(nsdentry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) deactivate_locked_super(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) ret = PTR_ERR(nsdentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) nsdentry = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) fc->root = nsdentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) if (!ctx->kfc.new_sb_created)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) cgroup_put(&ctx->root->cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) * Destroy a cgroup filesystem context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) static void cgroup_fs_context_free(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) kfree(ctx->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) kfree(ctx->release_agent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) put_cgroup_ns(ctx->ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) kernfs_free_fs_context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) static int cgroup_get_tree(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) cgrp_dfl_visible = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) cgroup_get_live(&cgrp_dfl_root.cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) ctx->root = &cgrp_dfl_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) ret = cgroup_do_get_tree(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) apply_cgroup_root_flags(ctx->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) static const struct fs_context_operations cgroup_fs_context_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) .free = cgroup_fs_context_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) .parse_param = cgroup2_parse_param,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) .get_tree = cgroup_get_tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) .reconfigure = cgroup_reconfigure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) static const struct fs_context_operations cgroup1_fs_context_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) .free = cgroup_fs_context_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) .parse_param = cgroup1_parse_param,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) .get_tree = cgroup1_get_tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) .reconfigure = cgroup1_reconfigure,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) * Initialise the cgroup filesystem creation/reconfiguration context. Notably,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) * we select the namespace we're going to use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) static int cgroup_init_fs_context(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) struct cgroup_fs_context *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) ctx = kzalloc(sizeof(struct cgroup_fs_context), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) if (!ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) ctx->ns = current->nsproxy->cgroup_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) get_cgroup_ns(ctx->ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) fc->fs_private = &ctx->kfc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) if (fc->fs_type == &cgroup2_fs_type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) fc->ops = &cgroup_fs_context_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) fc->ops = &cgroup1_fs_context_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) put_user_ns(fc->user_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) fc->user_ns = get_user_ns(ctx->ns->user_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) fc->global = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) static void cgroup_kill_sb(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) struct kernfs_root *kf_root = kernfs_root_from_sb(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) struct cgroup_root *root = cgroup_root_from_kf(kf_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) * If @root doesn't have any children, start killing it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) * This prevents new mounts by disabling percpu_ref_tryget_live().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) * cgroup_mount() may wait for @root's release.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) * And don't kill the default root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) if (list_empty(&root->cgrp.self.children) && root != &cgrp_dfl_root &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) !percpu_ref_is_dying(&root->cgrp.self.refcnt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) cgroup_bpf_offline(&root->cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) percpu_ref_kill(&root->cgrp.self.refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) cgroup_put(&root->cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) kernfs_kill_sb(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) struct file_system_type cgroup_fs_type = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) .name = "cgroup",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) .init_fs_context = cgroup_init_fs_context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) .parameters = cgroup1_fs_parameters,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) .kill_sb = cgroup_kill_sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) .fs_flags = FS_USERNS_MOUNT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) static struct file_system_type cgroup2_fs_type = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) .name = "cgroup2",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) .init_fs_context = cgroup_init_fs_context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) .parameters = cgroup2_fs_parameters,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) .kill_sb = cgroup_kill_sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) .fs_flags = FS_USERNS_MOUNT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) #ifdef CONFIG_CPUSETS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) static const struct fs_context_operations cpuset_fs_context_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) .get_tree = cgroup1_get_tree,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) .free = cgroup_fs_context_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) * This is ugly, but preserves the userspace API for existing cpuset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) * users. If someone tries to mount the "cpuset" filesystem, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) * silently switch it to mount "cgroup" instead
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) static int cpuset_init_fs_context(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) char *agent = kstrdup("/sbin/cpuset_release_agent", GFP_USER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) struct cgroup_fs_context *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) err = cgroup_init_fs_context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) kfree(agent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) fc->ops = &cpuset_fs_context_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) ctx = cgroup_fc2context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) ctx->subsys_mask = 1 << cpuset_cgrp_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) ctx->flags |= CGRP_ROOT_NOPREFIX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) ctx->release_agent = agent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) get_filesystem(&cgroup_fs_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) put_filesystem(fc->fs_type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) fc->fs_type = &cgroup_fs_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) static struct file_system_type cpuset_fs_type = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) .name = "cpuset",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) .init_fs_context = cpuset_init_fs_context,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) .fs_flags = FS_USERNS_MOUNT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, size_t buflen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) struct cgroup_namespace *ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) struct cgroup *root = cset_cgroup_from_root(ns->root_cset, cgrp->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) return kernfs_path_from_node(cgrp->kn, root->kn, buf, buflen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) struct cgroup_namespace *ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) ret = cgroup_path_ns_locked(cgrp, buf, buflen, ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) EXPORT_SYMBOL_GPL(cgroup_path_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) * task_cgroup_path - cgroup path of a task in the first cgroup hierarchy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) * @task: target task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) * @buf: the buffer to write the path into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) * @buflen: the length of the buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) * Determine @task's cgroup on the first (the one with the lowest non-zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) * hierarchy_id) cgroup hierarchy and copy its path into @buf. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) * function grabs cgroup_mutex and shouldn't be used inside locks used by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) * cgroup controller callbacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) * Return value is the same as kernfs_path().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) int task_cgroup_path(struct task_struct *task, char *buf, size_t buflen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) struct cgroup_root *root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) int hierarchy_id = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) root = idr_get_next(&cgroup_hierarchy_idr, &hierarchy_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) if (root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) cgrp = task_cgroup_from_root(task, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) ret = cgroup_path_ns_locked(cgrp, buf, buflen, &init_cgroup_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) /* if no hierarchy exists, everyone is in "/" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) ret = strlcpy(buf, "/", buflen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) EXPORT_SYMBOL_GPL(task_cgroup_path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) * cgroup_migrate_add_task - add a migration target task to a migration context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) * @task: target task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) * @mgctx: target migration context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) * Add @task, which is a migration target, to @mgctx->tset. This function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) * becomes noop if @task doesn't need to be migrated. @task's css_set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) * should have been added as a migration source and @task->cg_list will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) * moved from the css_set's tasks list to mg_tasks one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) static void cgroup_migrate_add_task(struct task_struct *task,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) struct cgroup_mgctx *mgctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) /* @task either already exited or can't exit until the end */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) if (task->flags & PF_EXITING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) /* cgroup_threadgroup_rwsem protects racing against forks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) WARN_ON_ONCE(list_empty(&task->cg_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) cset = task_css_set(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) if (!cset->mg_src_cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) mgctx->tset.nr_tasks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) list_move_tail(&task->cg_list, &cset->mg_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) if (list_empty(&cset->mg_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) list_add_tail(&cset->mg_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) &mgctx->tset.src_csets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) if (list_empty(&cset->mg_dst_cset->mg_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) list_add_tail(&cset->mg_dst_cset->mg_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) &mgctx->tset.dst_csets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) * cgroup_taskset_first - reset taskset and return the first task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) * @tset: taskset of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) * @dst_cssp: output variable for the destination css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) * @tset iteration is initialized and the first task is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) struct task_struct *cgroup_taskset_first(struct cgroup_taskset *tset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) struct cgroup_subsys_state **dst_cssp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) tset->cur_cset = list_first_entry(tset->csets, struct css_set, mg_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) tset->cur_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) return cgroup_taskset_next(tset, dst_cssp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) EXPORT_SYMBOL_GPL(cgroup_taskset_first);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) * cgroup_taskset_next - iterate to the next task in taskset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) * @tset: taskset of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) * @dst_cssp: output variable for the destination css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) * Return the next task in @tset. Iteration must have been initialized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) * with cgroup_taskset_first().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) struct task_struct *cgroup_taskset_next(struct cgroup_taskset *tset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) struct cgroup_subsys_state **dst_cssp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) struct css_set *cset = tset->cur_cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) struct task_struct *task = tset->cur_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) while (&cset->mg_node != tset->csets) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) if (!task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) task = list_first_entry(&cset->mg_tasks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) struct task_struct, cg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) task = list_next_entry(task, cg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) if (&task->cg_list != &cset->mg_tasks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) tset->cur_cset = cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) tset->cur_task = task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) * This function may be called both before and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) * after cgroup_taskset_migrate(). The two cases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) * can be distinguished by looking at whether @cset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) * has its ->mg_dst_cset set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) if (cset->mg_dst_cset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) *dst_cssp = cset->mg_dst_cset->subsys[tset->ssid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) *dst_cssp = cset->subsys[tset->ssid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) return task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) cset = list_next_entry(cset, mg_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) EXPORT_SYMBOL_GPL(cgroup_taskset_next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) * cgroup_taskset_migrate - migrate a taskset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) * @mgctx: migration context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) * Migrate tasks in @mgctx as setup by migration preparation functions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) * This function fails iff one of the ->can_attach callbacks fails and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) * guarantees that either all or none of the tasks in @mgctx are migrated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) * @mgctx is consumed regardless of success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) static int cgroup_migrate_execute(struct cgroup_mgctx *mgctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) struct cgroup_taskset *tset = &mgctx->tset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) struct task_struct *task, *tmp_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) struct css_set *cset, *tmp_cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) int ssid, failed_ssid, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) /* check that we can legitimately attach to the cgroup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) if (tset->nr_tasks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) if (ss->can_attach) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) tset->ssid = ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) ret = ss->can_attach(tset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) failed_ssid = ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) goto out_cancel_attach;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) * Now that we're guaranteed success, proceed to move all tasks to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) * the new cgroup. There are no failure cases after here, so this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) * is the commit point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) list_for_each_entry(cset, &tset->src_csets, mg_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) list_for_each_entry_safe(task, tmp_task, &cset->mg_tasks, cg_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) struct css_set *from_cset = task_css_set(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) struct css_set *to_cset = cset->mg_dst_cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) get_css_set(to_cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) to_cset->nr_tasks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) css_set_move_task(task, from_cset, to_cset, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) from_cset->nr_tasks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) * If the source or destination cgroup is frozen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) * the task might require to change its state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) cgroup_freezer_migrate_task(task, from_cset->dfl_cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) to_cset->dfl_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) put_css_set_locked(from_cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) * Migration is committed, all target tasks are now on dst_csets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) * Nothing is sensitive to fork() after this point. Notify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) * controllers that migration is complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) tset->csets = &tset->dst_csets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) if (tset->nr_tasks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) if (ss->attach) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) tset->ssid = ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) trace_android_vh_cgroup_attach(ss, tset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) ss->attach(tset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) goto out_release_tset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) out_cancel_attach:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) if (tset->nr_tasks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) do_each_subsys_mask(ss, ssid, mgctx->ss_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) if (ssid == failed_ssid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) if (ss->cancel_attach) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) tset->ssid = ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) ss->cancel_attach(tset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) out_release_tset:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) list_splice_init(&tset->dst_csets, &tset->src_csets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) list_for_each_entry_safe(cset, tmp_cset, &tset->src_csets, mg_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) list_splice_tail_init(&cset->mg_tasks, &cset->tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) list_del_init(&cset->mg_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) * Re-initialize the cgroup_taskset structure in case it is reused
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) * again in another cgroup_migrate_add_task()/cgroup_migrate_execute()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) * iteration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) tset->nr_tasks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) tset->csets = &tset->src_csets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) * cgroup_migrate_vet_dst - verify whether a cgroup can be migration destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) * @dst_cgrp: destination cgroup to test
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) * On the default hierarchy, except for the mixable, (possible) thread root
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) * and threaded cgroups, subtree_control must be zero for migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) * destination cgroups with tasks so that child cgroups don't compete
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) * against tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) int cgroup_migrate_vet_dst(struct cgroup *dst_cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) /* v1 doesn't have any restriction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) if (!cgroup_on_dfl(dst_cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) /* verify @dst_cgrp can host resources */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) if (!cgroup_is_valid_domain(dst_cgrp->dom_cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) /* mixables don't care */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) if (cgroup_is_mixable(dst_cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) * If @dst_cgrp is already or can become a thread root or is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) * threaded, it doesn't matter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) if (cgroup_can_be_thread_root(dst_cgrp) || cgroup_is_threaded(dst_cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) /* apply no-internal-process constraint */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) if (dst_cgrp->subtree_control)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) * cgroup_migrate_finish - cleanup after attach
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) * @mgctx: migration context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) * Undo cgroup_migrate_add_src() and cgroup_migrate_prepare_dst(). See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) * those functions for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) LIST_HEAD(preloaded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) struct css_set *cset, *tmp_cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) cset->mg_src_cgrp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) cset->mg_dst_cgrp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) cset->mg_dst_cset = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) list_del_init(&cset->mg_preload_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) put_css_set_locked(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) * cgroup_migrate_add_src - add a migration source css_set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) * @src_cset: the source css_set to add
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) * @dst_cgrp: the destination cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) * @mgctx: migration context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) * Tasks belonging to @src_cset are about to be migrated to @dst_cgrp. Pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) * @src_cset and add it to @mgctx->src_csets, which should later be cleaned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) * up by cgroup_migrate_finish().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) * This function may be called without holding cgroup_threadgroup_rwsem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) * even if the target is a process. Threads may be created and destroyed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) * but as long as cgroup_mutex is not dropped, no new css_set can be put
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) * into play and the preloaded css_sets are guaranteed to cover all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) * migrations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) void cgroup_migrate_add_src(struct css_set *src_cset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) struct cgroup *dst_cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) struct cgroup_mgctx *mgctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) struct cgroup *src_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) * If ->dead, @src_set is associated with one or more dead cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) * and doesn't contain any migratable tasks. Ignore it early so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) * that the rest of migration path doesn't get confused by it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) if (src_cset->dead)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) if (!list_empty(&src_cset->mg_preload_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) WARN_ON(src_cset->mg_src_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) WARN_ON(src_cset->mg_dst_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) WARN_ON(!list_empty(&src_cset->mg_tasks));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) WARN_ON(!list_empty(&src_cset->mg_node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) src_cset->mg_src_cgrp = src_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) src_cset->mg_dst_cgrp = dst_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) get_css_set(src_cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) * cgroup_migrate_prepare_dst - prepare destination css_sets for migration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) * @mgctx: migration context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) * Tasks are about to be moved and all the source css_sets have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) * preloaded to @mgctx->preloaded_src_csets. This function looks up and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) * pins all destination css_sets, links each to its source, and append them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) * to @mgctx->preloaded_dst_csets.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) * This function must be called after cgroup_migrate_add_src() has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) * called on each migration source css_set. After migration is performed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) * using cgroup_migrate(), cgroup_migrate_finish() must be called on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) * @mgctx.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) int cgroup_migrate_prepare_dst(struct cgroup_mgctx *mgctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) struct css_set *src_cset, *tmp_cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) /* look up the dst cset for each src cset and link it to src */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) mg_preload_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) struct css_set *dst_cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) dst_cset = find_css_set(src_cset, src_cset->mg_dst_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) if (!dst_cset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) WARN_ON_ONCE(src_cset->mg_dst_cset || dst_cset->mg_dst_cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) * If src cset equals dst, it's noop. Drop the src.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) * cgroup_migrate() will skip the cset too. Note that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) * can't handle src == dst as some nodes are used by both.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) if (src_cset == dst_cset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) src_cset->mg_src_cgrp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) src_cset->mg_dst_cgrp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) list_del_init(&src_cset->mg_preload_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) put_css_set(src_cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) put_css_set(dst_cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) src_cset->mg_dst_cset = dst_cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) if (list_empty(&dst_cset->mg_preload_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) list_add_tail(&dst_cset->mg_preload_node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) &mgctx->preloaded_dst_csets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) put_css_set(dst_cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) for_each_subsys(ss, ssid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) if (src_cset->subsys[ssid] != dst_cset->subsys[ssid])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) mgctx->ss_mask |= 1 << ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) * cgroup_migrate - migrate a process or task to a cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) * @leader: the leader of the process or the task to migrate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) * @threadgroup: whether @leader points to the whole process or a single task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) * @mgctx: migration context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) * Migrate a process or task denoted by @leader. If migrating a process,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) * the caller must be holding cgroup_threadgroup_rwsem. The caller is also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) * responsible for invoking cgroup_migrate_add_src() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) * cgroup_migrate_prepare_dst() on the targets before invoking this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) * function and following up with cgroup_migrate_finish().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) * As long as a controller's ->can_attach() doesn't fail, this function is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) * guaranteed to succeed. This means that, excluding ->can_attach()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) * failure, when migrating multiple targets, the success or failure can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) * decided for all targets by invoking group_migrate_prepare_dst() before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) * actually starting migrating.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) int cgroup_migrate(struct task_struct *leader, bool threadgroup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) struct cgroup_mgctx *mgctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) * Prevent freeing of tasks while we take a snapshot. Tasks that are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) * already PF_EXITING could be freed from underneath us unless we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) * take an rcu_read_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) task = leader;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) cgroup_migrate_add_task(task, mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) if (!threadgroup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) } while_each_thread(leader, task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) return cgroup_migrate_execute(mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) * cgroup_attach_task - attach a task or a whole threadgroup to a cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) * @dst_cgrp: the cgroup to attach to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) * @leader: the task or the leader of the threadgroup to be attached
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) * @threadgroup: attach the whole threadgroup?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) * Call holding cgroup_mutex and cgroup_threadgroup_rwsem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) int cgroup_attach_task(struct cgroup *dst_cgrp, struct task_struct *leader,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) bool threadgroup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) DEFINE_CGROUP_MGCTX(mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) /* look up all src csets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) task = leader;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) cgroup_migrate_add_src(task_css_set(task), dst_cgrp, &mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) if (!threadgroup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) } while_each_thread(leader, task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) /* prepare dst csets and commit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) ret = cgroup_migrate_prepare_dst(&mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) ret = cgroup_migrate(leader, threadgroup, &mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) cgroup_migrate_finish(&mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) TRACE_CGROUP_PATH(attach_task, dst_cgrp, leader, threadgroup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) struct task_struct *cgroup_procs_write_start(char *buf, bool threadgroup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) bool *locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) struct cgroup *dst_cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) __acquires(&cgroup_threadgroup_rwsem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) pid_t pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) bool force_migration = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) * If we migrate a single thread, we don't care about threadgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) * stability. If the thread is `current`, it won't exit(2) under our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) * hands or change PID through exec(2). We exclude
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) * cgroup_update_dfl_csses and other cgroup_{proc,thread}s_write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) * callers by cgroup_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) * Therefore, we can skip the global lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) if (pid || threadgroup) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) percpu_down_write(&cgroup_threadgroup_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) *locked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) *locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) if (pid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) tsk = find_task_by_vpid(pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) if (!tsk) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) tsk = ERR_PTR(-ESRCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) goto out_unlock_threadgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) tsk = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) if (threadgroup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) tsk = tsk->group_leader;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) if (tsk->flags & PF_KTHREAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) trace_android_rvh_cgroup_force_kthread_migration(tsk, dst_cgrp, &force_migration);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) * kthreads may acquire PF_NO_SETAFFINITY during initialization.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) * If userland migrates such a kthread to a non-root cgroup, it can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) * become trapped in a cpuset, or RT kthread may be born in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) * cgroup with no rt_runtime allocated. Just say no.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) if (!force_migration && (tsk->no_cgroup_migration || (tsk->flags & PF_NO_SETAFFINITY))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) tsk = ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) goto out_unlock_threadgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) get_task_struct(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) goto out_unlock_rcu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) out_unlock_threadgroup:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) if (*locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) percpu_up_write(&cgroup_threadgroup_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) *locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) out_unlock_rcu:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) return tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) void cgroup_procs_write_finish(struct task_struct *task, bool locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) __releases(&cgroup_threadgroup_rwsem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) /* release reference from cgroup_procs_write_start() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) put_task_struct(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) percpu_up_write(&cgroup_threadgroup_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) for_each_subsys(ss, ssid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) if (ss->post_attach)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) ss->post_attach();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) static void cgroup_print_ss_mask(struct seq_file *seq, u16 ss_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) bool printed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) do_each_subsys_mask(ss, ssid, ss_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) if (printed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) seq_putc(seq, ' ');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) seq_puts(seq, ss->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) printed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) if (printed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) seq_putc(seq, '\n');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) /* show controllers which are enabled from the parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) static int cgroup_controllers_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) struct cgroup *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) cgroup_print_ss_mask(seq, cgroup_control(cgrp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) /* show controllers which are enabled for a given cgroup's children */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) static int cgroup_subtree_control_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) struct cgroup *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) cgroup_print_ss_mask(seq, cgrp->subtree_control);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) * cgroup_update_dfl_csses - update css assoc of a subtree in default hierarchy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) * @cgrp: root of the subtree to update csses for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) * @cgrp's control masks have changed and its subtree's css associations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) * need to be updated accordingly. This function looks up all css_sets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) * which are attached to the subtree, creates the matching updated css_sets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) * and migrates the tasks to the new ones.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) static int cgroup_update_dfl_csses(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) DEFINE_CGROUP_MGCTX(mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) struct cgroup_subsys_state *d_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) struct cgroup *dsct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) struct css_set *src_cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) percpu_down_write(&cgroup_threadgroup_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) /* look up all csses currently attached to @cgrp's subtree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) struct cgrp_cset_link *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) list_for_each_entry(link, &dsct->cset_links, cset_link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) cgroup_migrate_add_src(link->cset, dsct, &mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) /* NULL dst indicates self on default hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) ret = cgroup_migrate_prepare_dst(&mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) struct task_struct *task, *ntask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) /* all tasks in src_csets need to be migrated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) list_for_each_entry_safe(task, ntask, &src_cset->tasks, cg_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) cgroup_migrate_add_task(task, &mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) ret = cgroup_migrate_execute(&mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) out_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) cgroup_migrate_finish(&mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) percpu_up_write(&cgroup_threadgroup_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) * cgroup_lock_and_drain_offline - lock cgroup_mutex and drain offlined csses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) * @cgrp: root of the target subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) * Because css offlining is asynchronous, userland may try to re-enable a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) * controller while the previous css is still around. This function grabs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) * cgroup_mutex and drains the previous css instances of @cgrp's subtree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) void cgroup_lock_and_drain_offline(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) __acquires(&cgroup_mutex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) struct cgroup *dsct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) struct cgroup_subsys_state *d_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) for_each_subsys(ss, ssid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) if (!css || !percpu_ref_is_dying(&css->refcnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) cgroup_get_live(dsct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) prepare_to_wait(&dsct->offline_waitq, &wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) finish_wait(&dsct->offline_waitq, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) cgroup_put(dsct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) * cgroup_save_control - save control masks and dom_cgrp of a subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) * @cgrp: root of the target subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) * Save ->subtree_control, ->subtree_ss_mask and ->dom_cgrp to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) * respective old_ prefixed fields for @cgrp's subtree including @cgrp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) * itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) static void cgroup_save_control(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) struct cgroup *dsct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) struct cgroup_subsys_state *d_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) dsct->old_subtree_control = dsct->subtree_control;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) dsct->old_subtree_ss_mask = dsct->subtree_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) dsct->old_dom_cgrp = dsct->dom_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) * cgroup_propagate_control - refresh control masks of a subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) * @cgrp: root of the target subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) * For @cgrp and its subtree, ensure ->subtree_ss_mask matches
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) * ->subtree_control and propagate controller availability through the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) * subtree so that descendants don't have unavailable controllers enabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) static void cgroup_propagate_control(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) struct cgroup *dsct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) struct cgroup_subsys_state *d_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) dsct->subtree_control &= cgroup_control(dsct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) dsct->subtree_ss_mask =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) cgroup_calc_subtree_ss_mask(dsct->subtree_control,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) cgroup_ss_mask(dsct));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) * cgroup_restore_control - restore control masks and dom_cgrp of a subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) * @cgrp: root of the target subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) * Restore ->subtree_control, ->subtree_ss_mask and ->dom_cgrp from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) * respective old_ prefixed fields for @cgrp's subtree including @cgrp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) * itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) static void cgroup_restore_control(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) struct cgroup *dsct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) struct cgroup_subsys_state *d_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) dsct->subtree_control = dsct->old_subtree_control;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) dsct->subtree_ss_mask = dsct->old_subtree_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) dsct->dom_cgrp = dsct->old_dom_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) static bool css_visible(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) struct cgroup_subsys *ss = css->ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) struct cgroup *cgrp = css->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) if (cgroup_control(cgrp) & (1 << ss->id))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) if (!(cgroup_ss_mask(cgrp) & (1 << ss->id)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) return cgroup_on_dfl(cgrp) && ss->implicit_on_dfl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) * cgroup_apply_control_enable - enable or show csses according to control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) * @cgrp: root of the target subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) * Walk @cgrp's subtree and create new csses or make the existing ones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) * visible. A css is created invisible if it's being implicitly enabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) * through dependency. An invisible css is made visible when the userland
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) * explicitly enables it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) * Returns 0 on success, -errno on failure. On failure, csses which have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) * been processed already aren't cleaned up. The caller is responsible for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) * cleaning up with cgroup_apply_control_disable().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) static int cgroup_apply_control_enable(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) struct cgroup *dsct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) struct cgroup_subsys_state *d_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) int ssid, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) for_each_subsys(ss, ssid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) if (!(cgroup_ss_mask(dsct) & (1 << ss->id)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) if (!css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) css = css_create(dsct, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) if (IS_ERR(css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) return PTR_ERR(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) if (css_visible(css)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) ret = css_populate_dir(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) * cgroup_apply_control_disable - kill or hide csses according to control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) * @cgrp: root of the target subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) * Walk @cgrp's subtree and kill and hide csses so that they match
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) * cgroup_ss_mask() and cgroup_visible_mask().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) * A css is hidden when the userland requests it to be disabled while other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) * subsystems are still depending on it. The css must not actively control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) * resources and be in the vanilla state if it's made visible again later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) * Controllers which may be depended upon should provide ->css_reset() for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) * this purpose.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) static void cgroup_apply_control_disable(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) struct cgroup *dsct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) struct cgroup_subsys_state *d_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) cgroup_for_each_live_descendant_post(dsct, d_css, cgrp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) for_each_subsys(ss, ssid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) struct cgroup_subsys_state *css = cgroup_css(dsct, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) if (!css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) WARN_ON_ONCE(percpu_ref_is_dying(&css->refcnt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) if (css->parent &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) !(cgroup_ss_mask(dsct) & (1 << ss->id))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) kill_css(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) } else if (!css_visible(css)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) css_clear_dir(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) if (ss->css_reset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) ss->css_reset(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) * cgroup_apply_control - apply control mask updates to the subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) * @cgrp: root of the target subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) * subsystems can be enabled and disabled in a subtree using the following
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) * steps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) * 1. Call cgroup_save_control() to stash the current state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) * 2. Update ->subtree_control masks in the subtree as desired.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) * 3. Call cgroup_apply_control() to apply the changes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) * 4. Optionally perform other related operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) * 5. Call cgroup_finalize_control() to finish up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) * This function implements step 3 and propagates the mask changes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) * throughout @cgrp's subtree, updates csses accordingly and perform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) * process migrations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) static int cgroup_apply_control(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) cgroup_propagate_control(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) ret = cgroup_apply_control_enable(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) * At this point, cgroup_e_css_by_mask() results reflect the new csses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) * making the following cgroup_update_dfl_csses() properly update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) * css associations of all tasks in the subtree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) ret = cgroup_update_dfl_csses(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) * cgroup_finalize_control - finalize control mask update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) * @cgrp: root of the target subtree
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) * @ret: the result of the update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) * Finalize control mask update. See cgroup_apply_control() for more info.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) static void cgroup_finalize_control(struct cgroup *cgrp, int ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) cgroup_restore_control(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) cgroup_propagate_control(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) cgroup_apply_control_disable(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) static int cgroup_vet_subtree_control_enable(struct cgroup *cgrp, u16 enable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) u16 domain_enable = enable & ~cgrp_dfl_threaded_ss_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) /* if nothing is getting enabled, nothing to worry about */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) if (!enable)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) /* can @cgrp host any resources? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) if (!cgroup_is_valid_domain(cgrp->dom_cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) /* mixables don't care */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) if (cgroup_is_mixable(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) if (domain_enable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) /* can't enable domain controllers inside a thread subtree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) if (cgroup_is_thread_root(cgrp) || cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) * Threaded controllers can handle internal competitions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) * and are always allowed inside a (prospective) thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) * subtree.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) if (cgroup_can_be_thread_root(cgrp) || cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) * Controllers can't be enabled for a cgroup with tasks to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) * child cgroups competing against tasks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) if (cgroup_has_tasks(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) /* change the enabled child controllers for a cgroup in the default hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) char *buf, size_t nbytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) u16 enable = 0, disable = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) struct cgroup *cgrp, *child;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) char *tok;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) int ssid, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) * Parse input - space separated list of subsystem names prefixed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) * with either + or -.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) buf = strstrip(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) while ((tok = strsep(&buf, " "))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) if (tok[0] == '\0')
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) do_each_subsys_mask(ss, ssid, ~cgrp_dfl_inhibit_ss_mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) if (!cgroup_ssid_enabled(ssid) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) strcmp(tok + 1, ss->name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) if (*tok == '+') {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) enable |= 1 << ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) disable &= ~(1 << ssid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) } else if (*tok == '-') {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) disable |= 1 << ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) enable &= ~(1 << ssid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) if (ssid == CGROUP_SUBSYS_COUNT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) cgrp = cgroup_kn_lock_live(of->kn, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) if (!cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) for_each_subsys(ss, ssid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) if (enable & (1 << ssid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) if (cgrp->subtree_control & (1 << ssid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) enable &= ~(1 << ssid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) if (!(cgroup_control(cgrp) & (1 << ssid))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) } else if (disable & (1 << ssid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) if (!(cgrp->subtree_control & (1 << ssid))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) disable &= ~(1 << ssid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) /* a child has it enabled? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) cgroup_for_each_live_child(child, cgrp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) if (child->subtree_control & (1 << ssid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) if (!enable && !disable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) ret = cgroup_vet_subtree_control_enable(cgrp, enable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) /* save and update control masks and prepare csses */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) cgroup_save_control(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) cgrp->subtree_control |= enable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) cgrp->subtree_control &= ~disable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) ret = cgroup_apply_control(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) cgroup_finalize_control(cgrp, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) kernfs_activate(cgrp->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) cgroup_kn_unlock(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) return ret ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) * cgroup_enable_threaded - make @cgrp threaded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) * @cgrp: the target cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) * Called when "threaded" is written to the cgroup.type interface file and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) * tries to make @cgrp threaded and join the parent's resource domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) * This function is never called on the root cgroup as cgroup.type doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) * exist on it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) static int cgroup_enable_threaded(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) struct cgroup *parent = cgroup_parent(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) struct cgroup *dom_cgrp = parent->dom_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) struct cgroup *dsct;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) struct cgroup_subsys_state *d_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) /* noop if already threaded */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) if (cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) * If @cgroup is populated or has domain controllers enabled, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) * can't be switched. While the below cgroup_can_be_thread_root()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) * test can catch the same conditions, that's only when @parent is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) * not mixable, so let's check it explicitly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) if (cgroup_is_populated(cgrp) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) cgrp->subtree_control & ~cgrp_dfl_threaded_ss_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) /* we're joining the parent's domain, ensure its validity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) if (!cgroup_is_valid_domain(dom_cgrp) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) !cgroup_can_be_thread_root(dom_cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) * The following shouldn't cause actual migrations and should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) * always succeed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) cgroup_save_control(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) if (dsct == cgrp || cgroup_is_threaded(dsct))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) dsct->dom_cgrp = dom_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) ret = cgroup_apply_control(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) parent->nr_threaded_children++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) cgroup_finalize_control(cgrp, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) static int cgroup_type_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) struct cgroup *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) if (cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) seq_puts(seq, "threaded\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) else if (!cgroup_is_valid_domain(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) seq_puts(seq, "domain invalid\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) else if (cgroup_is_thread_root(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) seq_puts(seq, "domain threaded\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) seq_puts(seq, "domain\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) static ssize_t cgroup_type_write(struct kernfs_open_file *of, char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) /* only switching to threaded mode is supported */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) if (strcmp(strstrip(buf), "threaded"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) /* drain dying csses before we re-apply (threaded) subtree control */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) cgrp = cgroup_kn_lock_live(of->kn, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) if (!cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) /* threaded can only be enabled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) ret = cgroup_enable_threaded(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) cgroup_kn_unlock(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) return ret ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) static int cgroup_max_descendants_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) struct cgroup *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) int descendants = READ_ONCE(cgrp->max_descendants);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) if (descendants == INT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) seq_puts(seq, "max\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) seq_printf(seq, "%d\n", descendants);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) static ssize_t cgroup_max_descendants_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) int descendants;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) buf = strstrip(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) if (!strcmp(buf, "max")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) descendants = INT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) ret = kstrtoint(buf, 0, &descendants);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) if (descendants < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) cgrp = cgroup_kn_lock_live(of->kn, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) if (!cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) cgrp->max_descendants = descendants;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) cgroup_kn_unlock(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) return nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) static int cgroup_max_depth_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) struct cgroup *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) int depth = READ_ONCE(cgrp->max_depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) if (depth == INT_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) seq_puts(seq, "max\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) seq_printf(seq, "%d\n", depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) static ssize_t cgroup_max_depth_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) int depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) buf = strstrip(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) if (!strcmp(buf, "max")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) depth = INT_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) ret = kstrtoint(buf, 0, &depth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) if (depth < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) cgrp = cgroup_kn_lock_live(of->kn, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) if (!cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) cgrp->max_depth = depth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) cgroup_kn_unlock(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) return nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) static int cgroup_events_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) struct cgroup *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) seq_printf(seq, "populated %d\n", cgroup_is_populated(cgrp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) seq_printf(seq, "frozen %d\n", test_bit(CGRP_FROZEN, &cgrp->flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) static int cgroup_stat_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) struct cgroup *cgroup = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) seq_printf(seq, "nr_descendants %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) cgroup->nr_descendants);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) seq_printf(seq, "nr_dying_descendants %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) cgroup->nr_dying_descendants);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) static int __maybe_unused cgroup_extra_stat_show(struct seq_file *seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) struct cgroup *cgrp, int ssid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) struct cgroup_subsys *ss = cgroup_subsys[ssid];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) if (!ss->css_extra_stat_show)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) css = cgroup_tryget_css(cgrp, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) if (!css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) ret = ss->css_extra_stat_show(seq, css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) css_put(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) static int cpu_stat_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) struct cgroup __maybe_unused *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) cgroup_base_stat_cputime_show(seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) #ifdef CONFIG_CGROUP_SCHED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) ret = cgroup_extra_stat_show(seq, cgrp, cpu_cgrp_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) #ifdef CONFIG_PSI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) static int cgroup_io_pressure_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) struct cgroup *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) return psi_show(seq, psi, PSI_IO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) struct cgroup *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) return psi_show(seq, psi, PSI_MEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) struct cgroup *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) return psi_show(seq, psi, PSI_CPU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) static ssize_t cgroup_pressure_write(struct kernfs_open_file *of, char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) size_t nbytes, enum psi_res res)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) struct psi_trigger *new;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) struct psi_group *psi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) cgrp = cgroup_kn_lock_live(of->kn, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) if (!cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) cgroup_get(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) cgroup_kn_unlock(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) /* Allow only one trigger per file descriptor */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) if (ctx->psi.trigger) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) cgroup_put(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) new = psi_trigger_create(psi, buf, nbytes, res);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) if (IS_ERR(new)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) cgroup_put(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) return PTR_ERR(new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) smp_store_release(&ctx->psi.trigger, new);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) cgroup_put(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) return nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) static ssize_t cgroup_io_pressure_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) char *buf, size_t nbytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) return cgroup_pressure_write(of, buf, nbytes, PSI_IO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) static ssize_t cgroup_memory_pressure_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) char *buf, size_t nbytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) return cgroup_pressure_write(of, buf, nbytes, PSI_MEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) static ssize_t cgroup_cpu_pressure_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) char *buf, size_t nbytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) return cgroup_pressure_write(of, buf, nbytes, PSI_CPU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) static __poll_t cgroup_pressure_poll(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) poll_table *pt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) return psi_trigger_poll(&ctx->psi.trigger, of->file, pt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) static void cgroup_pressure_release(struct kernfs_open_file *of)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) psi_trigger_destroy(ctx->psi.trigger);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) bool cgroup_psi_enabled(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) return (cgroup_feature_disable_mask & (1 << OPT_FEATURE_PRESSURE)) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) #else /* CONFIG_PSI */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) bool cgroup_psi_enabled(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) #endif /* CONFIG_PSI */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) static int cgroup_freeze_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) struct cgroup *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) seq_printf(seq, "%d\n", cgrp->freezer.freeze);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) int freeze;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) ret = kstrtoint(strstrip(buf), 0, &freeze);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) if (freeze < 0 || freeze > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) cgrp = cgroup_kn_lock_live(of->kn, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) if (!cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) cgroup_freeze(cgrp, freeze);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) cgroup_kn_unlock(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) return nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) static int cgroup_file_open(struct kernfs_open_file *of)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) struct cftype *cft = of->kn->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) struct cgroup_file_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) if (!ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) ctx->ns = current->nsproxy->cgroup_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) get_cgroup_ns(ctx->ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) of->priv = ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) if (!cft->open)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) ret = cft->open(of);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) put_cgroup_ns(ctx->ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) static void cgroup_file_release(struct kernfs_open_file *of)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) struct cftype *cft = of->kn->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) if (cft->release)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) cft->release(of);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) put_cgroup_ns(ctx->ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) kfree(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) struct cgroup *cgrp = of->kn->parent->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) struct cftype *cft = of->kn->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) if (!nbytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) * If namespaces are delegation boundaries, disallow writes to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) * files in an non-init namespace root from inside the namespace
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) * except for the files explicitly marked delegatable -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) * cgroup.procs and cgroup.subtree_control.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) if ((cgrp->root->flags & CGRP_ROOT_NS_DELEGATE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) !(cft->flags & CFTYPE_NS_DELEGATABLE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) ctx->ns != &init_cgroup_ns && ctx->ns->root_cset->dfl_cgrp == cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) if (cft->write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) return cft->write(of, buf, nbytes, off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) * kernfs guarantees that a file isn't deleted with operations in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) * flight, which means that the matching css is and stays alive and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) * doesn't need to be pinned. The RCU locking is not necessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) * either. It's just for the convenience of using cgroup_css().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) css = cgroup_css(cgrp, cft->ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) if (cft->write_u64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) unsigned long long v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) ret = kstrtoull(buf, 0, &v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) ret = cft->write_u64(css, cft, v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) } else if (cft->write_s64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) long long v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) ret = kstrtoll(buf, 0, &v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) ret = cft->write_s64(css, cft, v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) return ret ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) struct cftype *cft = of->kn->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) if (cft->poll)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) return cft->poll(of, pt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) return kernfs_generic_poll(of, pt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) static void *cgroup_seqfile_start(struct seq_file *seq, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) return seq_cft(seq)->seq_start(seq, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) static void *cgroup_seqfile_next(struct seq_file *seq, void *v, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) return seq_cft(seq)->seq_next(seq, v, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) static void cgroup_seqfile_stop(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) if (seq_cft(seq)->seq_stop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) seq_cft(seq)->seq_stop(seq, v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) static int cgroup_seqfile_show(struct seq_file *m, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) struct cftype *cft = seq_cft(m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) struct cgroup_subsys_state *css = seq_css(m);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) if (cft->seq_show)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) return cft->seq_show(m, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) if (cft->read_u64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) seq_printf(m, "%llu\n", cft->read_u64(css, cft));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) else if (cft->read_s64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) seq_printf(m, "%lld\n", cft->read_s64(css, cft));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) static struct kernfs_ops cgroup_kf_single_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) .atomic_write_len = PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) .open = cgroup_file_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) .release = cgroup_file_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) .write = cgroup_file_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) .poll = cgroup_file_poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) .seq_show = cgroup_seqfile_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) static struct kernfs_ops cgroup_kf_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) .atomic_write_len = PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) .open = cgroup_file_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) .release = cgroup_file_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) .write = cgroup_file_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) .poll = cgroup_file_poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) .seq_start = cgroup_seqfile_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) .seq_next = cgroup_seqfile_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) .seq_stop = cgroup_seqfile_stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) .seq_show = cgroup_seqfile_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) /* set uid and gid of cgroup dirs and files to that of the creator */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) static int cgroup_kn_set_ugid(struct kernfs_node *kn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) .ia_uid = current_fsuid(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) .ia_gid = current_fsgid(), };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) return kernfs_setattr(kn, &iattr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) static void cgroup_file_notify_timer(struct timer_list *timer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) cgroup_file_notify(container_of(timer, struct cgroup_file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) notify_timer));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) static int cgroup_add_file(struct cgroup_subsys_state *css, struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) struct cftype *cft)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) char name[CGROUP_FILE_NAME_MAX];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) struct kernfs_node *kn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) struct lock_class_key *key = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) #ifdef CONFIG_DEBUG_LOCK_ALLOC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) key = &cft->lockdep_key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) kn = __kernfs_create_file(cgrp->kn, cgroup_file_name(cgrp, cft, name),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) cgroup_file_mode(cft),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) 0, cft->kf_ops, cft,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) NULL, key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) if (IS_ERR(kn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) return PTR_ERR(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) ret = cgroup_kn_set_ugid(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) kernfs_remove(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) if (cft->file_offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) struct cgroup_file *cfile = (void *)css + cft->file_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) timer_setup(&cfile->notify_timer, cgroup_file_notify_timer, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) spin_lock_irq(&cgroup_file_kn_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) cfile->kn = kn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) spin_unlock_irq(&cgroup_file_kn_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) * cgroup_addrm_files - add or remove files to a cgroup directory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) * @css: the target css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) * @cgrp: the target cgroup (usually css->cgroup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) * @cfts: array of cftypes to be added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) * @is_add: whether to add or remove
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) * Depending on @is_add, add or remove files defined by @cfts on @cgrp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) * For removals, this function never fails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) static int cgroup_addrm_files(struct cgroup_subsys_state *css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) struct cgroup *cgrp, struct cftype cfts[],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) bool is_add)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) struct cftype *cft, *cft_end = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) for (cft = cfts; cft != cft_end && cft->name[0] != '\0'; cft++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) /* does cft->flags tell us to skip this file on @cgrp? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) if ((cft->flags & __CFTYPE_ONLY_ON_DFL) && !cgroup_on_dfl(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) if ((cft->flags & __CFTYPE_NOT_ON_DFL) && cgroup_on_dfl(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) if ((cft->flags & CFTYPE_NOT_ON_ROOT) && !cgroup_parent(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) if ((cft->flags & CFTYPE_ONLY_ON_ROOT) && cgroup_parent(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) if ((cft->flags & CFTYPE_DEBUG) && !cgroup_debug)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) if (is_add) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) ret = cgroup_add_file(css, cgrp, cft);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) pr_warn("%s: failed to add %s, err=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) __func__, cft->name, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) cft_end = cft;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) is_add = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) cgroup_rm_file(cgrp, cft);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) static int cgroup_apply_cftypes(struct cftype *cfts, bool is_add)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) struct cgroup_subsys *ss = cfts[0].ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) struct cgroup *root = &ss->root->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) /* add/rm files for all cgroups created before */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) css_for_each_descendant_pre(css, cgroup_css(root, ss)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) struct cgroup *cgrp = css->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) if (!(css->flags & CSS_VISIBLE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) ret = cgroup_addrm_files(css, cgrp, cfts, is_add);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) if (is_add && !ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) kernfs_activate(root->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) static void cgroup_exit_cftypes(struct cftype *cfts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) struct cftype *cft;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) for (cft = cfts; cft->name[0] != '\0'; cft++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) /* free copy for custom atomic_write_len, see init_cftypes() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) if (cft->max_write_len && cft->max_write_len != PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) kfree(cft->kf_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) cft->kf_ops = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) cft->ss = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) /* revert flags set by cgroup core while adding @cfts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) cft->flags &= ~(__CFTYPE_ONLY_ON_DFL | __CFTYPE_NOT_ON_DFL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) static int cgroup_init_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) struct cftype *cft;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) for (cft = cfts; cft->name[0] != '\0'; cft++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) struct kernfs_ops *kf_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) WARN_ON(cft->ss || cft->kf_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) if (cft->seq_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) kf_ops = &cgroup_kf_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) kf_ops = &cgroup_kf_single_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) * Ugh... if @cft wants a custom max_write_len, we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) * make a copy of kf_ops to set its atomic_write_len.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) if (cft->max_write_len && cft->max_write_len != PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) kf_ops = kmemdup(kf_ops, sizeof(*kf_ops), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) if (!kf_ops) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) cgroup_exit_cftypes(cfts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) kf_ops->atomic_write_len = cft->max_write_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) cft->kf_ops = kf_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) cft->ss = ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) static int cgroup_rm_cftypes_locked(struct cftype *cfts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) if (!cfts || !cfts[0].ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) list_del(&cfts->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) cgroup_apply_cftypes(cfts, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) cgroup_exit_cftypes(cfts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) * cgroup_rm_cftypes - remove an array of cftypes from a subsystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) * @cfts: zero-length name terminated array of cftypes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) * Unregister @cfts. Files described by @cfts are removed from all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) * existing cgroups and all future cgroups won't have them either. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) * function can be called anytime whether @cfts' subsys is attached or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) * Returns 0 on successful unregistration, -ENOENT if @cfts is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) * registered.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) int cgroup_rm_cftypes(struct cftype *cfts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) ret = cgroup_rm_cftypes_locked(cfts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) * cgroup_add_cftypes - add an array of cftypes to a subsystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109) * @ss: target cgroup subsystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) * @cfts: zero-length name terminated array of cftypes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) * Register @cfts to @ss. Files described by @cfts are created for all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) * existing cgroups to which @ss is attached and all future cgroups will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) * have them too. This function can be called anytime whether @ss is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) * attached or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) * Returns 0 on successful registration, -errno on failure. Note that this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) * function currently returns 0 as long as @cfts registration is successful
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) * even if some file creation attempts on existing cgroups fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) static int cgroup_add_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) if (!cgroup_ssid_enabled(ss->id))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) if (!cfts || cfts[0].name[0] == '\0')
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) ret = cgroup_init_cftypes(ss, cfts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) list_add_tail(&cfts->node, &ss->cfts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) ret = cgroup_apply_cftypes(cfts, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) cgroup_rm_cftypes_locked(cfts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147) * cgroup_add_dfl_cftypes - add an array of cftypes for default hierarchy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) * @ss: target cgroup subsystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) * @cfts: zero-length name terminated array of cftypes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) * Similar to cgroup_add_cftypes() but the added files are only used for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) * the default hierarchy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) int cgroup_add_dfl_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) struct cftype *cft;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) cft->flags |= __CFTYPE_ONLY_ON_DFL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) return cgroup_add_cftypes(ss, cfts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) * cgroup_add_legacy_cftypes - add an array of cftypes for legacy hierarchies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) * @ss: target cgroup subsystem
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) * @cfts: zero-length name terminated array of cftypes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) * Similar to cgroup_add_cftypes() but the added files are only used for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) * the legacy hierarchies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) int cgroup_add_legacy_cftypes(struct cgroup_subsys *ss, struct cftype *cfts)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) struct cftype *cft;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) for (cft = cfts; cft && cft->name[0] != '\0'; cft++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) cft->flags |= __CFTYPE_NOT_ON_DFL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) return cgroup_add_cftypes(ss, cfts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) EXPORT_SYMBOL_GPL(cgroup_add_legacy_cftypes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) * cgroup_file_notify - generate a file modified event for a cgroup_file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) * @cfile: target cgroup_file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) * @cfile must have been obtained by setting cftype->file_offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) void cgroup_file_notify(struct cgroup_file *cfile)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) spin_lock_irqsave(&cgroup_file_kn_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) if (cfile->kn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) unsigned long last = cfile->notified_at;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) unsigned long next = last + CGROUP_FILE_NOTIFY_MIN_INTV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) if (time_in_range(jiffies, last, next)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) timer_reduce(&cfile->notify_timer, next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) kernfs_notify(cfile->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) cfile->notified_at = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) spin_unlock_irqrestore(&cgroup_file_kn_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) * css_next_child - find the next child of a given css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) * @pos: the current position (%NULL to initiate traversal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) * @parent: css whose children to walk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) * This function returns the next child of @parent and should be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) * under either cgroup_mutex or RCU read lock. The only requirement is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213) * that @parent and @pos are accessible. The next sibling is guaranteed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) * be returned regardless of their states.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) * If a subsystem synchronizes ->css_online() and the start of iteration, a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) * css which finished ->css_online() is guaranteed to be visible in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) * future iterations and will stay visible until the last reference is put.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) * A css which hasn't finished ->css_online() or already finished
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) * ->css_offline() may show up during traversal. It's each subsystem's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221) * responsibility to synchronize against on/offlining.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) struct cgroup_subsys_state *parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) struct cgroup_subsys_state *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) cgroup_assert_mutex_or_rcu_locked();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) * @pos could already have been unlinked from the sibling list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) * Once a cgroup is removed, its ->sibling.next is no longer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) * updated when its next sibling changes. CSS_RELEASED is set when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) * @pos is taken off list, at which time its next pointer is valid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) * and, as releases are serialized, the one pointed to by the next
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) * pointer is guaranteed to not have started release yet. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) * implies that if we observe !CSS_RELEASED on @pos in this RCU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) * critical section, the one pointed to by its next pointer is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) * guaranteed to not have finished its RCU grace period even if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240) * have dropped rcu_read_lock() inbetween iterations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) * If @pos has CSS_RELEASED set, its next pointer can't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) * dereferenced; however, as each css is given a monotonically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) * increasing unique serial number and always appended to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) * sibling list, the next one can be found by walking the parent's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) * children until the first css with higher serial number than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) * @pos's. While this path can be slower, it happens iff iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) * races against release and the race window is very small.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) if (!pos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) next = list_entry_rcu(parent->children.next, struct cgroup_subsys_state, sibling);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) } else if (likely(!(pos->flags & CSS_RELEASED))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) next = list_entry_rcu(pos->sibling.next, struct cgroup_subsys_state, sibling);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) list_for_each_entry_rcu(next, &parent->children, sibling,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) lockdep_is_held(&cgroup_mutex))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257) if (next->serial_nr > pos->serial_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) * @next, if not pointing to the head, can be dereferenced and is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) * the next sibling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) if (&next->sibling != &parent->children)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) return next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) EXPORT_SYMBOL_GPL(css_next_child);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272) * css_next_descendant_pre - find the next descendant for pre-order walk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) * @pos: the current position (%NULL to initiate traversal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) * @root: css whose descendants to walk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) * To be used by css_for_each_descendant_pre(). Find the next descendant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) * to visit for pre-order traversal of @root's descendants. @root is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) * included in the iteration and the first node to be visited.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) * While this function requires cgroup_mutex or RCU read locking, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) * doesn't require the whole traversal to be contained in a single critical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) * section. This function will return the correct next descendant as long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) * as both @pos and @root are accessible and @pos is a descendant of @root.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) * If a subsystem synchronizes ->css_online() and the start of iteration, a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) * css which finished ->css_online() is guaranteed to be visible in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) * future iterations and will stay visible until the last reference is put.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) * A css which hasn't finished ->css_online() or already finished
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) * ->css_offline() may show up during traversal. It's each subsystem's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) * responsibility to synchronize against on/offlining.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) struct cgroup_subsys_state *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) css_next_descendant_pre(struct cgroup_subsys_state *pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) struct cgroup_subsys_state *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) struct cgroup_subsys_state *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) cgroup_assert_mutex_or_rcu_locked();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) /* if first iteration, visit @root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) if (!pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) return root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) /* visit the first child if exists */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) next = css_next_child(NULL, pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) if (next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) return next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) /* no child, visit my or the closest ancestor's next sibling */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) while (pos != root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) next = css_next_child(pos, pos->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) if (next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) return next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) pos = pos->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319) EXPORT_SYMBOL_GPL(css_next_descendant_pre);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) * css_rightmost_descendant - return the rightmost descendant of a css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) * @pos: css of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) * Return the rightmost descendant of @pos. If there's no descendant, @pos
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) * is returned. This can be used during pre-order traversal to skip
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) * subtree of @pos.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) * While this function requires cgroup_mutex or RCU read locking, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330) * doesn't require the whole traversal to be contained in a single critical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) * section. This function will return the correct rightmost descendant as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) * long as @pos is accessible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) struct cgroup_subsys_state *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) css_rightmost_descendant(struct cgroup_subsys_state *pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) struct cgroup_subsys_state *last, *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) cgroup_assert_mutex_or_rcu_locked();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342) last = pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) /* ->prev isn't RCU safe, walk ->next till the end */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344) pos = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) css_for_each_child(tmp, last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) pos = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) } while (pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) return last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352) static struct cgroup_subsys_state *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353) css_leftmost_descendant(struct cgroup_subsys_state *pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355) struct cgroup_subsys_state *last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) last = pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359) pos = css_next_child(NULL, pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) } while (pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) return last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) * css_next_descendant_post - find the next descendant for post-order walk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367) * @pos: the current position (%NULL to initiate traversal)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) * @root: css whose descendants to walk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370) * To be used by css_for_each_descendant_post(). Find the next descendant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371) * to visit for post-order traversal of @root's descendants. @root is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) * included in the iteration and the last node to be visited.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) * While this function requires cgroup_mutex or RCU read locking, it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) * doesn't require the whole traversal to be contained in a single critical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) * section. This function will return the correct next descendant as long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377) * as both @pos and @cgroup are accessible and @pos is a descendant of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) * @cgroup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) * If a subsystem synchronizes ->css_online() and the start of iteration, a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381) * css which finished ->css_online() is guaranteed to be visible in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) * future iterations and will stay visible until the last reference is put.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) * A css which hasn't finished ->css_online() or already finished
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) * ->css_offline() may show up during traversal. It's each subsystem's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) * responsibility to synchronize against on/offlining.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387) struct cgroup_subsys_state *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) css_next_descendant_post(struct cgroup_subsys_state *pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) struct cgroup_subsys_state *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) struct cgroup_subsys_state *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) cgroup_assert_mutex_or_rcu_locked();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) /* if first iteration, visit leftmost descendant which may be @root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) if (!pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) return css_leftmost_descendant(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) /* if we visited @root, we're done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) if (pos == root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) /* if there's an unvisited sibling, visit its leftmost descendant */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) next = css_next_child(pos, pos->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) if (next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) return css_leftmost_descendant(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408) /* no sibling left, visit parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) return pos->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) * css_has_online_children - does a css have online children
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) * @css: the target css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416) * Returns %true if @css has any online children; otherwise, %false. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) * function can be called from any context but the caller is responsible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418) * for synchronizing against on/offlining as necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) bool css_has_online_children(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) struct cgroup_subsys_state *child;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423) bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) css_for_each_child(child, css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) if (child->flags & CSS_ONLINE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) struct list_head *l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) struct cgrp_cset_link *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) /* find the next threaded cset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) if (it->tcset_pos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) l = it->tcset_pos->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) if (l != it->tcset_head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449) it->tcset_pos = l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) return container_of(l, struct css_set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451) threaded_csets_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454) it->tcset_pos = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457) /* find the next cset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458) l = it->cset_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) l = l->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460) if (l == it->cset_head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) it->cset_pos = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) if (it->ss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466) cset = container_of(l, struct css_set, e_cset_node[it->ss->id]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) link = list_entry(l, struct cgrp_cset_link, cset_link);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469) cset = link->cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) it->cset_pos = l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) /* initialize threaded css_set walking */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475) if (it->flags & CSS_TASK_ITER_THREADED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) if (it->cur_dcset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) put_css_set_locked(it->cur_dcset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) it->cur_dcset = cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) get_css_set(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) it->tcset_head = &cset->threaded_csets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) it->tcset_pos = &cset->threaded_csets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) return cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) * css_task_iter_advance_css_set - advance a task itererator to the next css_set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) * @it: the iterator to advance
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) * Advance @it to the next css_set to walk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) static void css_task_iter_advance_css_set(struct css_task_iter *it)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500) /* Advance to the next non-empty css_set and find first non-empty tasks list*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) while ((cset = css_task_iter_next_css_set(it))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502) if (!list_empty(&cset->tasks)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503) it->cur_tasks_head = &cset->tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) } else if (!list_empty(&cset->mg_tasks)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) it->cur_tasks_head = &cset->mg_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508) } else if (!list_empty(&cset->dying_tasks)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) it->cur_tasks_head = &cset->dying_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) if (!cset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) it->task_pos = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517) it->task_pos = it->cur_tasks_head->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) * We don't keep css_sets locked across iteration steps and thus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) * need to take steps to ensure that iteration can be resumed after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) * the lock is re-acquired. Iteration is performed at two levels -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523) * css_sets and tasks in them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) * Once created, a css_set never leaves its cgroup lists, so a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526) * pinned css_set is guaranteed to stay put and we can resume
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) * iteration afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) * Tasks may leave @cset across iteration steps. This is resolved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) * by registering each iterator with the css_set currently being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531) * walked and making css_set_move_task() advance iterators whose
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532) * next task is leaving.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) if (it->cur_cset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) list_del(&it->iters_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) put_css_set_locked(it->cur_cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538) get_css_set(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) it->cur_cset = cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) list_add(&it->iters_node, &cset->task_iters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) static void css_task_iter_skip(struct css_task_iter *it,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) if (it->task_pos == &task->cg_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) it->task_pos = it->task_pos->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) it->flags |= CSS_TASK_ITER_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) static void css_task_iter_advance(struct css_task_iter *it)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) lockdep_assert_held(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559) repeat:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560) if (it->task_pos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) * Advance iterator to find next entry. We go through cset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) * tasks, mg_tasks and dying_tasks, when consumed we move onto
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) * the next cset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) if (it->flags & CSS_TASK_ITER_SKIPPED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) it->flags &= ~CSS_TASK_ITER_SKIPPED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569) it->task_pos = it->task_pos->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571) if (it->task_pos == &it->cur_cset->tasks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) it->cur_tasks_head = &it->cur_cset->mg_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573) it->task_pos = it->cur_tasks_head->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575) if (it->task_pos == &it->cur_cset->mg_tasks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) it->cur_tasks_head = &it->cur_cset->dying_tasks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) it->task_pos = it->cur_tasks_head->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) if (it->task_pos == &it->cur_cset->dying_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580) css_task_iter_advance_css_set(it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) /* called from start, proceed to the first cset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583) css_task_iter_advance_css_set(it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586) if (!it->task_pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589) task = list_entry(it->task_pos, struct task_struct, cg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591) if (it->flags & CSS_TASK_ITER_PROCS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592) /* if PROCS, skip over tasks which aren't group leaders */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593) if (!thread_group_leader(task))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596) /* and dying leaders w/o live member threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597) if (it->cur_tasks_head == &it->cur_cset->dying_tasks &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598) !atomic_read(&task->signal->live))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) /* skip all dying ones */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602) if (it->cur_tasks_head == &it->cur_cset->dying_tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603) goto repeat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608) * css_task_iter_start - initiate task iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609) * @css: the css to walk tasks of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) * @flags: CSS_TASK_ITER_* flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611) * @it: the task iterator to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) * Initiate iteration through the tasks of @css. The caller can call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) * css_task_iter_next() to walk through the tasks until the function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615) * returns NULL. On completion of iteration, css_task_iter_end() must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616) * called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618) void css_task_iter_start(struct cgroup_subsys_state *css, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619) struct css_task_iter *it)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621) memset(it, 0, sizeof(*it));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625) it->ss = css->ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626) it->flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628) if (it->ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629) it->cset_pos = &css->cgroup->e_csets[css->ss->id];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631) it->cset_pos = &css->cgroup->cset_links;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633) it->cset_head = it->cset_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635) css_task_iter_advance(it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641) * css_task_iter_next - return the next task for the iterator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) * @it: the task iterator being iterated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) * The "next" function for task iteration. @it should have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) * initialized via css_task_iter_start(). Returns NULL when the iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646) * reaches the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648) struct task_struct *css_task_iter_next(struct css_task_iter *it)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650) if (it->cur_task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651) put_task_struct(it->cur_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652) it->cur_task = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657) /* @it may be half-advanced by skips, finish advancing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658) if (it->flags & CSS_TASK_ITER_SKIPPED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659) css_task_iter_advance(it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661) if (it->task_pos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662) it->cur_task = list_entry(it->task_pos, struct task_struct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) cg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664) get_task_struct(it->cur_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665) css_task_iter_advance(it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670) return it->cur_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) * css_task_iter_end - finish task iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675) * @it: the task iterator to finish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677) * Finish task iteration started by css_task_iter_start().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679) void css_task_iter_end(struct css_task_iter *it)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) if (it->cur_cset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) list_del(&it->iters_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) put_css_set_locked(it->cur_cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688) if (it->cur_dcset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689) put_css_set(it->cur_dcset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691) if (it->cur_task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) put_task_struct(it->cur_task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) static void cgroup_procs_release(struct kernfs_open_file *of)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) if (ctx->procs.started)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700) css_task_iter_end(&ctx->procs.iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705) struct kernfs_open_file *of = s->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) if (pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) (*pos)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) return css_task_iter_next(&ctx->procs.iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714) static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715) unsigned int iter_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) struct kernfs_open_file *of = s->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718) struct cgroup *cgrp = seq_css(s)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) struct css_task_iter *it = &ctx->procs.iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) * When a seq_file is seeked, it's always traversed sequentially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) * from position 0, so we can simply keep iterating on !0 *pos.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726) if (!ctx->procs.started) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) if (WARN_ON_ONCE((*pos)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729) css_task_iter_start(&cgrp->self, iter_flags, it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) ctx->procs.started = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731) } else if (!(*pos)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732) css_task_iter_end(it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) css_task_iter_start(&cgrp->self, iter_flags, it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735) return it->cur_task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737) return cgroup_procs_next(s, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740) static void *cgroup_procs_start(struct seq_file *s, loff_t *pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742) struct cgroup *cgrp = seq_css(s)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745) * All processes of a threaded subtree belong to the domain cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746) * of the subtree. Only threads can be distributed across the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747) * subtree. Reject reads on cgroup.procs in the subtree proper.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748) * They're always empty anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) if (cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) return ERR_PTR(-EOPNOTSUPP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753) return __cgroup_procs_start(s, pos, CSS_TASK_ITER_PROCS |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754) CSS_TASK_ITER_THREADED);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757) static int cgroup_procs_show(struct seq_file *s, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759) seq_printf(s, "%d\n", task_pid_vnr(v));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763) static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770) inode = kernfs_get_inode(sb, cgrp->procs_file.kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) if (!inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) ret = inode_permission(inode, MAY_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) static int cgroup_procs_write_permission(struct cgroup *src_cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780) struct cgroup *dst_cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782) struct cgroup_namespace *ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784) struct cgroup *com_cgrp = src_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789) /* find the common ancestor */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) while (!cgroup_is_descendant(dst_cgrp, com_cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) com_cgrp = cgroup_parent(com_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) /* %current should be authorized to migrate to the common ancestor */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) ret = cgroup_may_write(com_cgrp, sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799) * If namespaces are delegation boundaries, %current must be able
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) * to see both source and destination cgroups from its namespace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802) if ((cgrp_dfl_root.flags & CGRP_ROOT_NS_DELEGATE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) (!cgroup_is_descendant(src_cgrp, ns->root_cset->dfl_cgrp) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804) !cgroup_is_descendant(dst_cgrp, ns->root_cset->dfl_cgrp)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810) static int cgroup_attach_permissions(struct cgroup *src_cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811) struct cgroup *dst_cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) struct super_block *sb, bool threadgroup,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813) struct cgroup_namespace *ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817) ret = cgroup_procs_write_permission(src_cgrp, dst_cgrp, sb, ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821) ret = cgroup_migrate_vet_dst(dst_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) if (!threadgroup && (src_cgrp->dom_cgrp != dst_cgrp->dom_cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831) static ssize_t cgroup_procs_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) struct cgroup *src_cgrp, *dst_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) bool locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840) dst_cgrp = cgroup_kn_lock_live(of->kn, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841) if (!dst_cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844) task = cgroup_procs_write_start(buf, true, &locked, dst_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) ret = PTR_ERR_OR_ZERO(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849) /* find the source cgroup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854) ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855) of->file->f_path.dentry->d_sb, true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) ctx->ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860) ret = cgroup_attach_task(dst_cgrp, task, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) out_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) cgroup_procs_write_finish(task, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) cgroup_kn_unlock(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867) return ret ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) static void *cgroup_threads_start(struct seq_file *s, loff_t *pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872) return __cgroup_procs_start(s, pos, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875) static ssize_t cgroup_threads_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876) char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879) struct cgroup *src_cgrp, *dst_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882) bool locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884) buf = strstrip(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) dst_cgrp = cgroup_kn_lock_live(of->kn, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887) if (!dst_cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890) task = cgroup_procs_write_start(buf, false, &locked, dst_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891) ret = PTR_ERR_OR_ZERO(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) /* find the source cgroup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897) src_cgrp = task_cgroup_from_root(task, &cgrp_dfl_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900) /* thread migrations follow the cgroup.procs delegation rule */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901) ret = cgroup_attach_permissions(src_cgrp, dst_cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902) of->file->f_path.dentry->d_sb, false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903) ctx->ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905) goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) ret = cgroup_attach_task(dst_cgrp, task, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909) out_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910) cgroup_procs_write_finish(task, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) cgroup_kn_unlock(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914) return ret ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) /* cgroup core interface files for the default hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918) static struct cftype cgroup_base_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920) .name = "cgroup.type",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) .flags = CFTYPE_NOT_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) .seq_show = cgroup_type_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923) .write = cgroup_type_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) .name = "cgroup.procs",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927) .flags = CFTYPE_NS_DELEGATABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928) .file_offset = offsetof(struct cgroup, procs_file),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929) .release = cgroup_procs_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930) .seq_start = cgroup_procs_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931) .seq_next = cgroup_procs_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) .seq_show = cgroup_procs_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) .write = cgroup_procs_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) .name = "cgroup.threads",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937) .flags = CFTYPE_NS_DELEGATABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938) .release = cgroup_procs_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) .seq_start = cgroup_threads_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940) .seq_next = cgroup_procs_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941) .seq_show = cgroup_procs_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942) .write = cgroup_threads_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) .name = "cgroup.controllers",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) .seq_show = cgroup_controllers_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949) .name = "cgroup.subtree_control",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950) .flags = CFTYPE_NS_DELEGATABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) .seq_show = cgroup_subtree_control_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952) .write = cgroup_subtree_control_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) .name = "cgroup.events",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956) .flags = CFTYPE_NOT_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) .file_offset = offsetof(struct cgroup, events_file),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958) .seq_show = cgroup_events_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) .name = "cgroup.max.descendants",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962) .seq_show = cgroup_max_descendants_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963) .write = cgroup_max_descendants_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966) .name = "cgroup.max.depth",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967) .seq_show = cgroup_max_depth_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) .write = cgroup_max_depth_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971) .name = "cgroup.stat",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972) .seq_show = cgroup_stat_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975) .name = "cgroup.freeze",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976) .flags = CFTYPE_NOT_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) .seq_show = cgroup_freeze_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978) .write = cgroup_freeze_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) .name = "cpu.stat",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) .seq_show = cpu_stat_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984) #ifdef CONFIG_PSI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) .name = "io.pressure",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) .flags = CFTYPE_PRESSURE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988) .seq_show = cgroup_io_pressure_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) .write = cgroup_io_pressure_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990) .poll = cgroup_pressure_poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) .release = cgroup_pressure_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) .name = "memory.pressure",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) .flags = CFTYPE_PRESSURE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996) .seq_show = cgroup_memory_pressure_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997) .write = cgroup_memory_pressure_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998) .poll = cgroup_pressure_poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999) .release = cgroup_pressure_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) .name = "cpu.pressure",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003) .flags = CFTYPE_PRESSURE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) .seq_show = cgroup_cpu_pressure_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005) .write = cgroup_cpu_pressure_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006) .poll = cgroup_pressure_poll,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) .release = cgroup_pressure_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) #endif /* CONFIG_PSI */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010) { } /* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) * css destruction is four-stage process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016) * 1. Destruction starts. Killing of the percpu_ref is initiated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) * Implemented in kill_css().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019) * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) * and thus css_tryget_online() is guaranteed to fail, the css can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) * offlined by invoking offline_css(). After offlining, the base ref is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022) * put. Implemented in css_killed_work_fn().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) * 3. When the percpu_ref reaches zero, the only possible remaining
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) * accessors are inside RCU read sections. css_release() schedules the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026) * RCU callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028) * 4. After the grace period, the css can be freed. Implemented in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029) * css_free_work_fn().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) * It is actually hairier because both step 2 and 4 require process context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032) * and thus involve punting to css->destroy_work adding two additional
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) * steps to the already complex sequence.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035) static void css_free_rwork_fn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037) struct cgroup_subsys_state *css = container_of(to_rcu_work(work),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) struct cgroup_subsys_state, destroy_rwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039) struct cgroup_subsys *ss = css->ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040) struct cgroup *cgrp = css->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042) percpu_ref_exit(&css->refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044) if (ss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045) /* css free path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046) struct cgroup_subsys_state *parent = css->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047) int id = css->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) ss->css_free(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050) cgroup_idr_remove(&ss->css_idr, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051) cgroup_put(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) if (parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054) css_put(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056) /* cgroup free path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057) atomic_dec(&cgrp->root->nr_cgrps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) cgroup1_pidlist_destroy_all(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059) cancel_work_sync(&cgrp->release_agent_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061) if (cgroup_parent(cgrp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063) * We get a ref to the parent, and put the ref when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064) * this cgroup is being freed, so it's guaranteed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) * that the parent won't be destroyed before its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) * children.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068) cgroup_put(cgroup_parent(cgrp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) kernfs_put(cgrp->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070) psi_cgroup_free(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) if (cgroup_on_dfl(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072) cgroup_rstat_exit(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073) kfree(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076) * This is root cgroup's refcnt reaching zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) * which indicates that the root should be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078) * released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080) cgroup_destroy_root(cgrp->root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085) static void css_release_work_fn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) struct cgroup_subsys_state *css =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088) container_of(work, struct cgroup_subsys_state, destroy_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089) struct cgroup_subsys *ss = css->ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090) struct cgroup *cgrp = css->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) css->flags |= CSS_RELEASED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) list_del_rcu(&css->sibling);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097) if (ss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) /* css release path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099) if (!list_empty(&css->rstat_css_node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) cgroup_rstat_flush(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101) list_del_rcu(&css->rstat_css_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) cgroup_idr_replace(&ss->css_idr, NULL, css->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105) if (ss->css_released)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106) ss->css_released(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108) struct cgroup *tcgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) /* cgroup release path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111) TRACE_CGROUP_PATH(release, cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113) if (cgroup_on_dfl(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114) cgroup_rstat_flush(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117) for (tcgrp = cgroup_parent(cgrp); tcgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) tcgrp = cgroup_parent(tcgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119) tcgrp->nr_dying_descendants--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) * There are two control paths which try to determine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) * cgroup from dentry without going through kernfs -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125) * cgroupstats_build() and css_tryget_online_from_dir().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) * Those are supported by RCU protecting clearing of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) * cgrp->kn->priv backpointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) if (cgrp->kn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130) RCU_INIT_POINTER(*(void __rcu __force **)&cgrp->kn->priv,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137) queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140) static void css_release(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142) struct cgroup_subsys_state *css =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) container_of(ref, struct cgroup_subsys_state, refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145) INIT_WORK(&css->destroy_work, css_release_work_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) queue_work(cgroup_destroy_wq, &css->destroy_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149) static void init_and_link_css(struct cgroup_subsys_state *css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150) struct cgroup_subsys *ss, struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154) cgroup_get_live(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156) memset(css, 0, sizeof(*css));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157) css->cgroup = cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158) css->ss = ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159) css->id = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160) INIT_LIST_HEAD(&css->sibling);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161) INIT_LIST_HEAD(&css->children);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) INIT_LIST_HEAD(&css->rstat_css_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163) css->serial_nr = css_serial_nr_next++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164) atomic_set(&css->online_cnt, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) if (cgroup_parent(cgrp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167) css->parent = cgroup_css(cgroup_parent(cgrp), ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168) css_get(css->parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) if (cgroup_on_dfl(cgrp) && ss->css_rstat_flush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172) list_add_rcu(&css->rstat_css_node, &cgrp->rstat_css_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174) BUG_ON(cgroup_css(cgrp, ss));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177) /* invoke ->css_online() on a new CSS and mark it online if successful */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) static int online_css(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) struct cgroup_subsys *ss = css->ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185) if (ss->css_online)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186) ret = ss->css_online(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188) css->flags |= CSS_ONLINE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189) rcu_assign_pointer(css->cgroup->subsys[ss->id], css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191) atomic_inc(&css->online_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192) if (css->parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193) atomic_inc(&css->parent->online_cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198) /* if the CSS is online, invoke ->css_offline() on it and mark it offline */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199) static void offline_css(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201) struct cgroup_subsys *ss = css->ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205) if (!(css->flags & CSS_ONLINE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) if (ss->css_offline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209) ss->css_offline(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211) css->flags &= ~CSS_ONLINE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212) RCU_INIT_POINTER(css->cgroup->subsys[ss->id], NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214) wake_up_all(&css->cgroup->offline_waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218) * css_create - create a cgroup_subsys_state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219) * @cgrp: the cgroup new css will be associated with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220) * @ss: the subsys of new css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222) * Create a new css associated with @cgrp - @ss pair. On success, the new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223) * css is online and installed in @cgrp. This function doesn't create the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) * interface files. Returns 0 on success, -errno on failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5225) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5226) static struct cgroup_subsys_state *css_create(struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5227) struct cgroup_subsys *ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5229) struct cgroup *parent = cgroup_parent(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5230) struct cgroup_subsys_state *parent_css = cgroup_css(parent, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5231) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5232) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5234) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5236) css = ss->css_alloc(parent_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5237) if (!css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5238) css = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5239) if (IS_ERR(css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5240) return css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5242) init_and_link_css(css, ss, cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5244) err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5245) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5246) goto err_free_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5248) err = cgroup_idr_alloc(&ss->css_idr, NULL, 2, 0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5249) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5250) goto err_free_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5251) css->id = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5253) /* @css is ready to be brought online now, make it visible */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5254) list_add_tail_rcu(&css->sibling, &parent_css->children);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5255) cgroup_idr_replace(&ss->css_idr, css, css->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5257) err = online_css(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5258) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5259) goto err_list_del;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5261) if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5262) cgroup_parent(parent)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5263) pr_warn("%s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5264) current->comm, current->pid, ss->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5265) if (!strcmp(ss->name, "memory"))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5266) pr_warn("\"memory\" requires setting use_hierarchy to 1 on the root\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5267) ss->warned_broken_hierarchy = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5270) return css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5272) err_list_del:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5273) list_del_rcu(&css->sibling);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5274) err_free_css:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5275) list_del_rcu(&css->rstat_css_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5276) INIT_RCU_WORK(&css->destroy_rwork, css_free_rwork_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5277) queue_rcu_work(cgroup_destroy_wq, &css->destroy_rwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5278) return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5279) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5281) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5282) * The returned cgroup is fully initialized including its control mask, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5283) * it isn't associated with its kernfs_node and doesn't have the control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5284) * mask applied.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5285) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5286) static struct cgroup *cgroup_create(struct cgroup *parent, const char *name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5287) umode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5288) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5289) struct cgroup_root *root = parent->root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5290) struct cgroup *cgrp, *tcgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5291) struct kernfs_node *kn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5292) int level = parent->level + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5293) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5295) /* allocate the cgroup and its ID, 0 is reserved for the root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5296) cgrp = kzalloc(struct_size(cgrp, ancestor_ids, (level + 1)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5297) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5298) if (!cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5299) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5301) ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5302) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5303) goto out_free_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5305) if (cgroup_on_dfl(parent)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5306) ret = cgroup_rstat_init(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5307) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5308) goto out_cancel_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5311) /* create the directory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5312) kn = kernfs_create_dir(parent->kn, name, mode, cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5313) if (IS_ERR(kn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5314) ret = PTR_ERR(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5315) goto out_stat_exit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5317) cgrp->kn = kn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5319) init_cgroup_housekeeping(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5321) cgrp->self.parent = &parent->self;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5322) cgrp->root = root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5323) cgrp->level = level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5325) ret = psi_cgroup_alloc(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5326) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5327) goto out_kernfs_remove;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5329) ret = cgroup_bpf_inherit(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5330) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5331) goto out_psi_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5333) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5334) * New cgroup inherits effective freeze counter, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5335) * if the parent has to be frozen, the child has too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5336) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5337) cgrp->freezer.e_freeze = parent->freezer.e_freeze;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5338) if (cgrp->freezer.e_freeze) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5339) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5340) * Set the CGRP_FREEZE flag, so when a process will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5341) * attached to the child cgroup, it will become frozen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5342) * At this point the new cgroup is unpopulated, so we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5343) * consider it frozen immediately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5344) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5345) set_bit(CGRP_FREEZE, &cgrp->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5346) set_bit(CGRP_FROZEN, &cgrp->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5349) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5350) for (tcgrp = cgrp; tcgrp; tcgrp = cgroup_parent(tcgrp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5351) cgrp->ancestor_ids[tcgrp->level] = cgroup_id(tcgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5353) if (tcgrp != cgrp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5354) tcgrp->nr_descendants++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5356) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5357) * If the new cgroup is frozen, all ancestor cgroups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5358) * get a new frozen descendant, but their state can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5359) * change because of this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5360) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5361) if (cgrp->freezer.e_freeze)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5362) tcgrp->freezer.nr_frozen_descendants++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5365) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5367) if (notify_on_release(parent))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5368) set_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5370) if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &parent->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5371) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &cgrp->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5373) cgrp->self.serial_nr = css_serial_nr_next++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5375) /* allocation complete, commit to creation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5376) list_add_tail_rcu(&cgrp->self.sibling, &cgroup_parent(cgrp)->self.children);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5377) atomic_inc(&root->nr_cgrps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5378) cgroup_get_live(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5380) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5381) * On the default hierarchy, a child doesn't automatically inherit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5382) * subtree_control from the parent. Each is configured manually.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5383) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5384) if (!cgroup_on_dfl(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5385) cgrp->subtree_control = cgroup_control(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5387) cgroup_propagate_control(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5389) return cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5391) out_psi_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5392) psi_cgroup_free(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5393) out_kernfs_remove:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5394) kernfs_remove(cgrp->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5395) out_stat_exit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5396) if (cgroup_on_dfl(parent))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5397) cgroup_rstat_exit(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5398) out_cancel_ref:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5399) percpu_ref_exit(&cgrp->self.refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5400) out_free_cgrp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5401) kfree(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5402) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5403) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5405) static bool cgroup_check_hierarchy_limits(struct cgroup *parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5406) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5407) struct cgroup *cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5408) int ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5409) int level = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5411) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5413) for (cgroup = parent; cgroup; cgroup = cgroup_parent(cgroup)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5414) if (cgroup->nr_descendants >= cgroup->max_descendants)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5415) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5417) if (level > cgroup->max_depth)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5418) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5420) level++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5423) ret = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5424) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5425) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5426) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5428) int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name, umode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5430) struct cgroup *parent, *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5431) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5433) /* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5434) if (strchr(name, '\n'))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5435) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5437) parent = cgroup_kn_lock_live(parent_kn, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5438) if (!parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5439) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5441) if (!cgroup_check_hierarchy_limits(parent)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5442) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5443) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5444) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5446) cgrp = cgroup_create(parent, name, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5447) if (IS_ERR(cgrp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5448) ret = PTR_ERR(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5449) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5452) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5453) * This extra ref will be put in cgroup_free_fn() and guarantees
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5454) * that @cgrp->kn is always accessible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5455) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5456) kernfs_get(cgrp->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5458) ret = cgroup_kn_set_ugid(cgrp->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5459) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5460) goto out_destroy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5462) ret = css_populate_dir(&cgrp->self);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5463) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5464) goto out_destroy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5466) ret = cgroup_apply_control_enable(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5467) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5468) goto out_destroy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5470) TRACE_CGROUP_PATH(mkdir, cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5472) /* let's create and online css's */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5473) kernfs_activate(cgrp->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5475) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5476) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5478) out_destroy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5479) cgroup_destroy_locked(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5480) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5481) cgroup_kn_unlock(parent_kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5482) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5485) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5486) * This is called when the refcnt of a css is confirmed to be killed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5487) * css_tryget_online() is now guaranteed to fail. Tell the subsystem to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5488) * initate destruction and put the css ref from kill_css().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5489) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5490) static void css_killed_work_fn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5492) struct cgroup_subsys_state *css =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5493) container_of(work, struct cgroup_subsys_state, destroy_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5495) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5497) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5498) offline_css(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5499) css_put(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5500) /* @css can't go away while we're holding cgroup_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5501) css = css->parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5502) } while (css && atomic_dec_and_test(&css->online_cnt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5504) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5507) /* css kill confirmation processing requires process context, bounce */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5508) static void css_killed_ref_fn(struct percpu_ref *ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5510) struct cgroup_subsys_state *css =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5511) container_of(ref, struct cgroup_subsys_state, refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5513) if (atomic_dec_and_test(&css->online_cnt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5514) INIT_WORK(&css->destroy_work, css_killed_work_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5515) queue_work(cgroup_destroy_wq, &css->destroy_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5516) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5519) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5520) * kill_css - destroy a css
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5521) * @css: css to destroy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5522) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5523) * This function initiates destruction of @css by removing cgroup interface
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5524) * files and putting its base reference. ->css_offline() will be invoked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5525) * asynchronously once css_tryget_online() is guaranteed to fail and when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5526) * the reference count reaches zero, @css will be released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5527) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5528) static void kill_css(struct cgroup_subsys_state *css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5529) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5530) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5532) if (css->flags & CSS_DYING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5533) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5535) css->flags |= CSS_DYING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5537) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5538) * This must happen before css is disassociated with its cgroup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5539) * See seq_css() for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5540) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5541) css_clear_dir(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5543) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5544) * Killing would put the base ref, but we need to keep it alive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5545) * until after ->css_offline().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5546) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5547) css_get(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5549) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5550) * cgroup core guarantees that, by the time ->css_offline() is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5551) * invoked, no new css reference will be given out via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5552) * css_tryget_online(). We can't simply call percpu_ref_kill() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5553) * proceed to offlining css's because percpu_ref_kill() doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5554) * guarantee that the ref is seen as killed on all CPUs on return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5555) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5556) * Use percpu_ref_kill_and_confirm() to get notifications as each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5557) * css is confirmed to be seen as killed on all CPUs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5558) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5559) percpu_ref_kill_and_confirm(&css->refcnt, css_killed_ref_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5562) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5563) * cgroup_destroy_locked - the first stage of cgroup destruction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5564) * @cgrp: cgroup to be destroyed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5565) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5566) * css's make use of percpu refcnts whose killing latency shouldn't be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5567) * exposed to userland and are RCU protected. Also, cgroup core needs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5568) * guarantee that css_tryget_online() won't succeed by the time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5569) * ->css_offline() is invoked. To satisfy all the requirements,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5570) * destruction is implemented in the following two steps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5571) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5572) * s1. Verify @cgrp can be destroyed and mark it dying. Remove all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5573) * userland visible parts and start killing the percpu refcnts of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5574) * css's. Set up so that the next stage will be kicked off once all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5575) * the percpu refcnts are confirmed to be killed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5576) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5577) * s2. Invoke ->css_offline(), mark the cgroup dead and proceed with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5578) * rest of destruction. Once all cgroup references are gone, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5579) * cgroup is RCU-freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5580) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5581) * This function implements s1. After this step, @cgrp is gone as far as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5582) * the userland is concerned and a new cgroup with the same name may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5583) * created. As cgroup doesn't care about the names internally, this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5584) * doesn't cause any problem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5585) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5586) static int cgroup_destroy_locked(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5587) __releases(&cgroup_mutex) __acquires(&cgroup_mutex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5588) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5589) struct cgroup *tcgrp, *parent = cgroup_parent(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5590) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5591) struct cgrp_cset_link *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5592) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5594) lockdep_assert_held(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5596) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5597) * Only migration can raise populated from zero and we're already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5598) * holding cgroup_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5599) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5600) if (cgroup_is_populated(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5601) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5603) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5604) * Make sure there's no live children. We can't test emptiness of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5605) * ->self.children as dead children linger on it while being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5606) * drained; otherwise, "rmdir parent/child parent" may fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5607) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5608) if (css_has_online_children(&cgrp->self))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5609) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5611) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5612) * Mark @cgrp and the associated csets dead. The former prevents
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5613) * further task migration and child creation by disabling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5614) * cgroup_lock_live_group(). The latter makes the csets ignored by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5615) * the migration path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5616) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5617) cgrp->self.flags &= ~CSS_ONLINE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5619) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5620) list_for_each_entry(link, &cgrp->cset_links, cset_link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5621) link->cset->dead = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5622) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5624) /* initiate massacre of all css's */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5625) for_each_css(css, ssid, cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5626) kill_css(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5628) /* clear and remove @cgrp dir, @cgrp has an extra ref on its kn */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5629) css_clear_dir(&cgrp->self);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5630) kernfs_remove(cgrp->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5632) if (parent && cgroup_is_threaded(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5633) parent->nr_threaded_children--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5635) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5636) for (tcgrp = cgroup_parent(cgrp); tcgrp; tcgrp = cgroup_parent(tcgrp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5637) tcgrp->nr_descendants--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5638) tcgrp->nr_dying_descendants++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5639) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5640) * If the dying cgroup is frozen, decrease frozen descendants
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5641) * counters of ancestor cgroups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5642) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5643) if (test_bit(CGRP_FROZEN, &cgrp->flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5644) tcgrp->freezer.nr_frozen_descendants--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5646) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5648) cgroup1_check_for_release(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5650) cgroup_bpf_offline(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5652) /* put the base reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5653) percpu_ref_kill(&cgrp->self.refcnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5655) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5656) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5658) int cgroup_rmdir(struct kernfs_node *kn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5659) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5660) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5661) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5663) cgrp = cgroup_kn_lock_live(kn, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5664) if (!cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5665) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5667) ret = cgroup_destroy_locked(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5668) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5669) TRACE_CGROUP_PATH(rmdir, cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5671) cgroup_kn_unlock(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5672) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5675) static struct kernfs_syscall_ops cgroup_kf_syscall_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5676) .show_options = cgroup_show_options,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5677) .mkdir = cgroup_mkdir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5678) .rmdir = cgroup_rmdir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5679) .show_path = cgroup_show_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5680) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5682) static void __init cgroup_init_subsys(struct cgroup_subsys *ss, bool early)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5684) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5686) pr_debug("Initializing cgroup subsys %s\n", ss->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5688) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5690) idr_init(&ss->css_idr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5691) INIT_LIST_HEAD(&ss->cfts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5693) /* Create the root cgroup state for this subsystem */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5694) ss->root = &cgrp_dfl_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5695) css = ss->css_alloc(cgroup_css(&cgrp_dfl_root.cgrp, ss));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5696) /* We don't handle early failures gracefully */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5697) BUG_ON(IS_ERR(css));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5698) init_and_link_css(css, ss, &cgrp_dfl_root.cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5700) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5701) * Root csses are never destroyed and we can't initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5702) * percpu_ref during early init. Disable refcnting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5703) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5704) css->flags |= CSS_NO_REF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5706) if (early) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5707) /* allocation can't be done safely during early init */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5708) css->id = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5709) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5710) css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5711) BUG_ON(css->id < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5714) /* Update the init_css_set to contain a subsys
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5715) * pointer to this state - since the subsystem is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5716) * newly registered, all tasks and hence the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5717) * init_css_set is in the subsystem's root cgroup. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5718) init_css_set.subsys[ss->id] = css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5720) have_fork_callback |= (bool)ss->fork << ss->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5721) have_exit_callback |= (bool)ss->exit << ss->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5722) have_release_callback |= (bool)ss->release << ss->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5723) have_canfork_callback |= (bool)ss->can_fork << ss->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5725) /* At system boot, before all subsystems have been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5726) * registered, no tasks have been forked, so we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5727) * need to invoke fork callbacks here. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5728) BUG_ON(!list_empty(&init_task.tasks));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5730) BUG_ON(online_css(css));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5732) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5735) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5736) * cgroup_init_early - cgroup initialization at system boot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5737) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5738) * Initialize cgroups at system boot, and initialize any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5739) * subsystems that request early init.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5740) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5741) int __init cgroup_init_early(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5742) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5743) static struct cgroup_fs_context __initdata ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5744) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5745) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5747) ctx.root = &cgrp_dfl_root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5748) init_cgroup_root(&ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5749) cgrp_dfl_root.cgrp.self.flags |= CSS_NO_REF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5750)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5751) RCU_INIT_POINTER(init_task.cgroups, &init_css_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5753) for_each_subsys(ss, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5754) WARN(!ss->css_alloc || !ss->css_free || ss->name || ss->id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5755) "invalid cgroup_subsys %d:%s css_alloc=%p css_free=%p id:name=%d:%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5756) i, cgroup_subsys_name[i], ss->css_alloc, ss->css_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5757) ss->id, ss->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5758) WARN(strlen(cgroup_subsys_name[i]) > MAX_CGROUP_TYPE_NAMELEN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5759) "cgroup_subsys_name %s too long\n", cgroup_subsys_name[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5761) ss->id = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5762) ss->name = cgroup_subsys_name[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5763) if (!ss->legacy_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5764) ss->legacy_name = cgroup_subsys_name[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5766) if (ss->early_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5767) cgroup_init_subsys(ss, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5769) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5772) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5773) * cgroup_init - cgroup initialization
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5774) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5775) * Register cgroup filesystem and /proc file, and initialize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5776) * any subsystems that didn't request early init.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5777) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5778) int __init cgroup_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5779) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5780) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5781) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5783) BUILD_BUG_ON(CGROUP_SUBSYS_COUNT > 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5784) BUG_ON(cgroup_init_cftypes(NULL, cgroup_base_files));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5785) BUG_ON(cgroup_init_cftypes(NULL, cgroup1_base_files));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5787) cgroup_rstat_boot();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5789) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5790) * The latency of the synchronize_rcu() is too high for cgroups,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5791) * avoid it at the cost of forcing all readers into the slow path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5792) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5793) rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5795) get_user_ns(init_cgroup_ns.user_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5797) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5799) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5800) * Add init_css_set to the hash table so that dfl_root can link to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5801) * it during init.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5802) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5803) hash_add(css_set_table, &init_css_set.hlist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5804) css_set_hash(init_css_set.subsys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5806) BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5808) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5810) for_each_subsys(ss, ssid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5811) if (ss->early_init) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5812) struct cgroup_subsys_state *css =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5813) init_css_set.subsys[ss->id];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5815) css->id = cgroup_idr_alloc(&ss->css_idr, css, 1, 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5816) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5817) BUG_ON(css->id < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5818) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5819) cgroup_init_subsys(ss, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5820) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5822) list_add_tail(&init_css_set.e_cset_node[ssid],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5823) &cgrp_dfl_root.cgrp.e_csets[ssid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5825) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5826) * Setting dfl_root subsys_mask needs to consider the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5827) * disabled flag and cftype registration needs kmalloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5828) * both of which aren't available during early_init.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5829) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5830) if (!cgroup_ssid_enabled(ssid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5831) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5833) if (cgroup1_ssid_disabled(ssid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5834) printk(KERN_INFO "Disabling %s control group subsystem in v1 mounts\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5835) ss->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5837) cgrp_dfl_root.subsys_mask |= 1 << ss->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5839) /* implicit controllers must be threaded too */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5840) WARN_ON(ss->implicit_on_dfl && !ss->threaded);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5842) if (ss->implicit_on_dfl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5843) cgrp_dfl_implicit_ss_mask |= 1 << ss->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5844) else if (!ss->dfl_cftypes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5845) cgrp_dfl_inhibit_ss_mask |= 1 << ss->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5847) if (ss->threaded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5848) cgrp_dfl_threaded_ss_mask |= 1 << ss->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5850) if (ss->dfl_cftypes == ss->legacy_cftypes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5851) WARN_ON(cgroup_add_cftypes(ss, ss->dfl_cftypes));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5852) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5853) WARN_ON(cgroup_add_dfl_cftypes(ss, ss->dfl_cftypes));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5854) WARN_ON(cgroup_add_legacy_cftypes(ss, ss->legacy_cftypes));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5855) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5857) if (ss->bind)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5858) ss->bind(init_css_set.subsys[ssid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5860) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5861) css_populate_dir(init_css_set.subsys[ssid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5862) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5863) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5864)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5865) /* init_css_set.subsys[] has been updated, re-hash */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5866) hash_del(&init_css_set.hlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5867) hash_add(css_set_table, &init_css_set.hlist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5868) css_set_hash(init_css_set.subsys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5870) WARN_ON(sysfs_create_mount_point(fs_kobj, "cgroup"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5871) WARN_ON(register_filesystem(&cgroup_fs_type));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5872) WARN_ON(register_filesystem(&cgroup2_fs_type));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5873) WARN_ON(!proc_create_single("cgroups", 0, NULL, proc_cgroupstats_show));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5874) #ifdef CONFIG_CPUSETS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5875) WARN_ON(register_filesystem(&cpuset_fs_type));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5876) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5878) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5881) static int __init cgroup_wq_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5882) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5883) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5884) * There isn't much point in executing destruction path in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5885) * parallel. Good chunk is serialized with cgroup_mutex anyway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5886) * Use 1 for @max_active.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5887) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5888) * We would prefer to do this in cgroup_init() above, but that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5889) * is called before init_workqueues(): so leave this until after.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5890) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5891) cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5892) BUG_ON(!cgroup_destroy_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5893) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5894) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5895) core_initcall(cgroup_wq_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5897) void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5898) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5899) struct kernfs_node *kn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5901) kn = kernfs_find_and_get_node_by_id(cgrp_dfl_root.kf_root, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5902) if (!kn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5903) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5904) kernfs_path(kn, buf, buflen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5905) kernfs_put(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5906) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5908) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5909) * proc_cgroup_show()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5910) * - Print task's cgroup paths into seq_file, one line for each hierarchy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5911) * - Used for /proc/<pid>/cgroup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5912) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5913) int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5914) struct pid *pid, struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5915) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5916) char *buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5917) int retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5918) struct cgroup_root *root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5920) retval = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5921) buf = kmalloc(PATH_MAX, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5922) if (!buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5923) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5925) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5926) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5928) for_each_root(root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5929) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5930) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5931) int ssid, count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5933) if (root == &cgrp_dfl_root && !cgrp_dfl_visible)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5934) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5936) seq_printf(m, "%d:", root->hierarchy_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5937) if (root != &cgrp_dfl_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5938) for_each_subsys(ss, ssid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5939) if (root->subsys_mask & (1 << ssid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5940) seq_printf(m, "%s%s", count++ ? "," : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5941) ss->legacy_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5942) if (strlen(root->name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5943) seq_printf(m, "%sname=%s", count ? "," : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5944) root->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5945) seq_putc(m, ':');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5947) cgrp = task_cgroup_from_root(tsk, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5949) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5950) * On traditional hierarchies, all zombie tasks show up as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5951) * belonging to the root cgroup. On the default hierarchy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5952) * while a zombie doesn't show up in "cgroup.procs" and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5953) * thus can't be migrated, its /proc/PID/cgroup keeps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5954) * reporting the cgroup it belonged to before exiting. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5955) * the cgroup is removed before the zombie is reaped,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5956) * " (deleted)" is appended to the cgroup path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5957) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5958) if (cgroup_on_dfl(cgrp) || !(tsk->flags & PF_EXITING)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5959) retval = cgroup_path_ns_locked(cgrp, buf, PATH_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5960) current->nsproxy->cgroup_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5961) if (retval >= PATH_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5962) retval = -ENAMETOOLONG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5963) if (retval < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5964) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5966) seq_puts(m, buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5967) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5968) seq_puts(m, "/");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5971) if (cgroup_on_dfl(cgrp) && cgroup_is_dead(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5972) seq_puts(m, " (deleted)\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5973) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5974) seq_putc(m, '\n');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5977) retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5978) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5979) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5980) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5981) kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5982) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5983) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5986) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5987) * cgroup_fork - initialize cgroup related fields during copy_process()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5988) * @child: pointer to task_struct of forking parent process.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5989) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5990) * A task is associated with the init_css_set until cgroup_post_fork()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5991) * attaches it to the target css_set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5992) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5993) void cgroup_fork(struct task_struct *child)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5994) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5995) RCU_INIT_POINTER(child->cgroups, &init_css_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5996) INIT_LIST_HEAD(&child->cg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5997) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5999) static struct cgroup *cgroup_get_from_file(struct file *f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6000) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6001) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6002) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6004) css = css_tryget_online_from_dir(f->f_path.dentry, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6005) if (IS_ERR(css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6006) return ERR_CAST(css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6007)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6008) cgrp = css->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6009) if (!cgroup_on_dfl(cgrp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6010) cgroup_put(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6011) return ERR_PTR(-EBADF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6013)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6014) return cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6017) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6018) * cgroup_css_set_fork - find or create a css_set for a child process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6019) * @kargs: the arguments passed to create the child process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6020) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6021) * This functions finds or creates a new css_set which the child
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6022) * process will be attached to in cgroup_post_fork(). By default,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6023) * the child process will be given the same css_set as its parent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6024) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6025) * If CLONE_INTO_CGROUP is specified this function will try to find an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6026) * existing css_set which includes the requested cgroup and if not create
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6027) * a new css_set that the child will be attached to later. If this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6028) * succeeds it will hold cgroup_threadgroup_rwsem on return. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6029) * CLONE_INTO_CGROUP is requested this function will grab cgroup mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6030) * before grabbing cgroup_threadgroup_rwsem and will hold a reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6031) * to the target cgroup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6032) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6033) static int cgroup_css_set_fork(struct kernel_clone_args *kargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6034) __acquires(&cgroup_mutex) __acquires(&cgroup_threadgroup_rwsem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6035) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6036) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6037) struct cgroup *dst_cgrp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6038) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6039) struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6040) struct file *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6042) if (kargs->flags & CLONE_INTO_CGROUP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6043) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6045) cgroup_threadgroup_change_begin(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6047) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6048) cset = task_css_set(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6049) get_css_set(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6050) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6052) if (!(kargs->flags & CLONE_INTO_CGROUP)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6053) kargs->cset = cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6054) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6055) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6057) f = fget_raw(kargs->cgroup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6058) if (!f) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6059) ret = -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6060) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6062) sb = f->f_path.dentry->d_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6064) dst_cgrp = cgroup_get_from_file(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6065) if (IS_ERR(dst_cgrp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6066) ret = PTR_ERR(dst_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6067) dst_cgrp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6068) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6071) if (cgroup_is_dead(dst_cgrp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6072) ret = -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6073) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6074) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6076) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6077) * Verify that we the target cgroup is writable for us. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6078) * usually done by the vfs layer but since we're not going through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6079) * the vfs layer here we need to do it "manually".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6080) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6081) ret = cgroup_may_write(dst_cgrp, sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6082) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6083) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6085) ret = cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6086) !(kargs->flags & CLONE_THREAD),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6087) current->nsproxy->cgroup_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6088) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6089) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6091) kargs->cset = find_css_set(cset, dst_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6092) if (!kargs->cset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6093) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6094) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6095) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6097) put_css_set(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6098) fput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6099) kargs->cgrp = dst_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6100) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6102) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6103) cgroup_threadgroup_change_end(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6104) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6105) if (f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6106) fput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6107) if (dst_cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6108) cgroup_put(dst_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6109) put_css_set(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6110) if (kargs->cset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6111) put_css_set(kargs->cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6112) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6115) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6116) * cgroup_css_set_put_fork - drop references we took during fork
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6117) * @kargs: the arguments passed to create the child process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6118) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6119) * Drop references to the prepared css_set and target cgroup if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6120) * CLONE_INTO_CGROUP was requested.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6122) static void cgroup_css_set_put_fork(struct kernel_clone_args *kargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6123) __releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6124) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6125) cgroup_threadgroup_change_end(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6127) if (kargs->flags & CLONE_INTO_CGROUP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6128) struct cgroup *cgrp = kargs->cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6129) struct css_set *cset = kargs->cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6131) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6133) if (cset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6134) put_css_set(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6135) kargs->cset = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6138) if (cgrp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6139) cgroup_put(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6140) kargs->cgrp = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6145) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6146) * cgroup_can_fork - called on a new task before the process is exposed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6147) * @child: the child process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6148) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6149) * This prepares a new css_set for the child process which the child will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6150) * be attached to in cgroup_post_fork().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6151) * This calls the subsystem can_fork() callbacks. If the cgroup_can_fork()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6152) * callback returns an error, the fork aborts with that error code. This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6153) * allows for a cgroup subsystem to conditionally allow or deny new forks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6154) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6155) int cgroup_can_fork(struct task_struct *child, struct kernel_clone_args *kargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6156) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6157) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6158) int i, j, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6160) ret = cgroup_css_set_fork(kargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6161) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6162) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6164) do_each_subsys_mask(ss, i, have_canfork_callback) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6165) ret = ss->can_fork(child, kargs->cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6166) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6167) goto out_revert;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6168) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6170) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6172) out_revert:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6173) for_each_subsys(ss, j) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6174) if (j >= i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6175) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6176) if (ss->cancel_fork)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6177) ss->cancel_fork(child, kargs->cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6180) cgroup_css_set_put_fork(kargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6182) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6185) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6186) * cgroup_cancel_fork - called if a fork failed after cgroup_can_fork()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6187) * @child: the child process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6188) * @kargs: the arguments passed to create the child process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6189) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6190) * This calls the cancel_fork() callbacks if a fork failed *after*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6191) * cgroup_can_fork() succeded and cleans up references we took to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6192) * prepare a new css_set for the child process in cgroup_can_fork().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6193) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6194) void cgroup_cancel_fork(struct task_struct *child,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6195) struct kernel_clone_args *kargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6197) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6198) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6200) for_each_subsys(ss, i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6201) if (ss->cancel_fork)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6202) ss->cancel_fork(child, kargs->cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6204) cgroup_css_set_put_fork(kargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6207) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6208) * cgroup_post_fork - finalize cgroup setup for the child process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6209) * @child: the child process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6210) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6211) * Attach the child process to its css_set calling the subsystem fork()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6212) * callbacks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6213) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6214) void cgroup_post_fork(struct task_struct *child,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6215) struct kernel_clone_args *kargs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6216) __releases(&cgroup_threadgroup_rwsem) __releases(&cgroup_mutex)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6217) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6218) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6219) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6220) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6222) cset = kargs->cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6223) kargs->cset = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6225) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6227) /* init tasks are special, only link regular threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6228) if (likely(child->pid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6229) WARN_ON_ONCE(!list_empty(&child->cg_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6230) cset->nr_tasks++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6231) css_set_move_task(child, NULL, cset, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6232) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6233) put_css_set(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6234) cset = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6237) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6238) * If the cgroup has to be frozen, the new task has too. Let's set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6239) * the JOBCTL_TRAP_FREEZE jobctl bit to get the task into the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6240) * frozen state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6241) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6242) if (unlikely(cgroup_task_freeze(child))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6243) spin_lock(&child->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6244) WARN_ON_ONCE(child->frozen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6245) child->jobctl |= JOBCTL_TRAP_FREEZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6246) spin_unlock(&child->sighand->siglock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6248) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6249) * Calling cgroup_update_frozen() isn't required here,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6250) * because it will be called anyway a bit later from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6251) * do_freezer_trap(). So we avoid cgroup's transient switch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6252) * from the frozen state and back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6253) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6256) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6258) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6259) * Call ss->fork(). This must happen after @child is linked on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6260) * css_set; otherwise, @child might change state between ->fork()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6261) * and addition to css_set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6262) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6263) do_each_subsys_mask(ss, i, have_fork_callback) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6264) ss->fork(child);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6265) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6267) /* Make the new cset the root_cset of the new cgroup namespace. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6268) if (kargs->flags & CLONE_NEWCGROUP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6269) struct css_set *rcset = child->nsproxy->cgroup_ns->root_cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6271) get_css_set(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6272) child->nsproxy->cgroup_ns->root_cset = cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6273) put_css_set(rcset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6274) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6276) cgroup_css_set_put_fork(kargs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6279) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6280) * cgroup_exit - detach cgroup from exiting task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6281) * @tsk: pointer to task_struct of exiting process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6282) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6283) * Description: Detach cgroup from @tsk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6284) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6285) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6286) void cgroup_exit(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6287) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6288) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6289) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6290) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6292) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6294) WARN_ON_ONCE(list_empty(&tsk->cg_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6295) cset = task_css_set(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6296) css_set_move_task(tsk, cset, NULL, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6297) list_add_tail(&tsk->cg_list, &cset->dying_tasks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6298) cset->nr_tasks--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6300) WARN_ON_ONCE(cgroup_task_frozen(tsk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6301) if (unlikely(cgroup_task_freeze(tsk)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6302) cgroup_update_frozen(task_dfl_cgroup(tsk));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6304) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6306) /* see cgroup_post_fork() for details */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6307) do_each_subsys_mask(ss, i, have_exit_callback) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6308) ss->exit(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6309) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6312) void cgroup_release(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6314) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6315) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6317) do_each_subsys_mask(ss, ssid, have_release_callback) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6318) ss->release(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6319) } while_each_subsys_mask();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6321) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6322) css_set_skip_task_iters(task_css_set(task), task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6323) list_del_init(&task->cg_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6324) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6325) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6327) void cgroup_free(struct task_struct *task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6329) struct css_set *cset = task_css_set(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6330) put_css_set(cset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6333) static int __init cgroup_disable(char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6334) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6335) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6336) char *token;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6337) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6339) while ((token = strsep(&str, ",")) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6340) if (!*token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6341) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6343) for_each_subsys(ss, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6344) if (strcmp(token, ss->name) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6345) strcmp(token, ss->legacy_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6346) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6348) static_branch_disable(cgroup_subsys_enabled_key[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6349) pr_info("Disabling %s control group subsystem\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6350) ss->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6353) for (i = 0; i < OPT_FEATURE_COUNT; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6354) if (strcmp(token, cgroup_opt_feature_names[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6355) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6356) cgroup_feature_disable_mask |= 1 << i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6357) pr_info("Disabling %s control group feature\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6358) cgroup_opt_feature_names[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6359) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6362) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6364) __setup("cgroup_disable=", cgroup_disable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6366) void __init __weak enable_debug_cgroup(void) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6368) static int __init enable_cgroup_debug(char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6370) cgroup_debug = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6371) enable_debug_cgroup();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6372) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6374) __setup("cgroup_debug", enable_cgroup_debug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6376) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6377) * css_tryget_online_from_dir - get corresponding css from a cgroup dentry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6378) * @dentry: directory dentry of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6379) * @ss: subsystem of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6380) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6381) * If @dentry is a directory for a cgroup which has @ss enabled on it, try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6382) * to get the corresponding css and return it. If such css doesn't exist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6383) * or can't be pinned, an ERR_PTR value is returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6384) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6385) struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6386) struct cgroup_subsys *ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6387) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6388) struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6389) struct file_system_type *s_type = dentry->d_sb->s_type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6390) struct cgroup_subsys_state *css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6391) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6393) /* is @dentry a cgroup dir? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6394) if ((s_type != &cgroup_fs_type && s_type != &cgroup2_fs_type) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6395) !kn || kernfs_type(kn) != KERNFS_DIR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6396) return ERR_PTR(-EBADF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6398) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6400) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6401) * This path doesn't originate from kernfs and @kn could already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6402) * have been or be removed at any point. @kn->priv is RCU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6403) * protected for this access. See css_release_work_fn() for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6404) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6405) cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6406) if (cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6407) css = cgroup_css(cgrp, ss);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6409) if (!css || !css_tryget_online(css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6410) css = ERR_PTR(-ENOENT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6412) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6413) return css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6416) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6417) * css_from_id - lookup css by id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6418) * @id: the cgroup id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6419) * @ss: cgroup subsys to be looked into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6420) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6421) * Returns the css if there's valid one with @id, otherwise returns NULL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6422) * Should be called under rcu_read_lock().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6423) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6424) struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6425) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6426) WARN_ON_ONCE(!rcu_read_lock_held());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6427) return idr_find(&ss->css_idr, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6430) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6431) * cgroup_get_from_path - lookup and get a cgroup from its default hierarchy path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6432) * @path: path on the default hierarchy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6433) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6434) * Find the cgroup at @path on the default hierarchy, increment its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6435) * reference count and return it. Returns pointer to the found cgroup on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6436) * success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6437) * if @path points to a non-directory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6438) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6439) struct cgroup *cgroup_get_from_path(const char *path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6441) struct kernfs_node *kn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6442) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6444) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6446) kn = kernfs_walk_and_get(cgrp_dfl_root.cgrp.kn, path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6447) if (kn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6448) if (kernfs_type(kn) == KERNFS_DIR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6449) cgrp = kn->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6450) cgroup_get_live(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6451) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6452) cgrp = ERR_PTR(-ENOTDIR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6454) kernfs_put(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6455) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6456) cgrp = ERR_PTR(-ENOENT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6459) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6460) return cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6462) EXPORT_SYMBOL_GPL(cgroup_get_from_path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6464) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6465) * cgroup_get_from_fd - get a cgroup pointer from a fd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6466) * @fd: fd obtained by open(cgroup2_dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6467) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6468) * Find the cgroup from a fd which should be obtained
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6469) * by opening a cgroup directory. Returns a pointer to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6470) * cgroup on success. ERR_PTR is returned if the cgroup
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6471) * cannot be found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6472) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6473) struct cgroup *cgroup_get_from_fd(int fd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6475) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6476) struct file *f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6478) f = fget_raw(fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6479) if (!f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6480) return ERR_PTR(-EBADF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6482) cgrp = cgroup_get_from_file(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6483) fput(f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6484) return cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6486) EXPORT_SYMBOL_GPL(cgroup_get_from_fd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6488) static u64 power_of_ten(int power)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6490) u64 v = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6491) while (power--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6492) v *= 10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6493) return v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6496) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6497) * cgroup_parse_float - parse a floating number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6498) * @input: input string
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6499) * @dec_shift: number of decimal digits to shift
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6500) * @v: output
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6501) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6502) * Parse a decimal floating point number in @input and store the result in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6503) * @v with decimal point right shifted @dec_shift times. For example, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6504) * @input is "12.3456" and @dec_shift is 3, *@v will be set to 12345.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6505) * Returns 0 on success, -errno otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6506) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6507) * There's nothing cgroup specific about this function except that it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6508) * currently the only user.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6509) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6510) int cgroup_parse_float(const char *input, unsigned dec_shift, s64 *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6511) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6512) s64 whole, frac = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6513) int fstart = 0, fend = 0, flen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6515) if (!sscanf(input, "%lld.%n%lld%n", &whole, &fstart, &frac, &fend))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6516) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6517) if (frac < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6518) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6520) flen = fend > fstart ? fend - fstart : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6521) if (flen < dec_shift)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6522) frac *= power_of_ten(dec_shift - flen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6523) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6524) frac = DIV_ROUND_CLOSEST_ULL(frac, power_of_ten(flen - dec_shift));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6526) *v = whole * power_of_ten(dec_shift) + frac;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6527) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6528) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6530) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6531) * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6532) * definition in cgroup-defs.h.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6533) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6534) #ifdef CONFIG_SOCK_CGROUP_DATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6536) #if defined(CONFIG_CGROUP_NET_PRIO) || defined(CONFIG_CGROUP_NET_CLASSID)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6538) DEFINE_SPINLOCK(cgroup_sk_update_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6539) static bool cgroup_sk_alloc_disabled __read_mostly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6541) void cgroup_sk_alloc_disable(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6543) if (cgroup_sk_alloc_disabled)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6544) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6545) pr_info("cgroup: disabling cgroup2 socket matching due to net_prio or net_cls activation\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6546) cgroup_sk_alloc_disabled = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6549) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6551) #define cgroup_sk_alloc_disabled false
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6553) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6555) void cgroup_sk_alloc(struct sock_cgroup_data *skcd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6556) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6557) if (cgroup_sk_alloc_disabled) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6558) skcd->no_refcnt = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6559) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6562) /* Don't associate the sock with unrelated interrupted task's cgroup. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6563) if (in_interrupt())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6564) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6566) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6568) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6569) struct css_set *cset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6571) cset = task_css_set(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6572) if (likely(cgroup_tryget(cset->dfl_cgrp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6573) skcd->val = (unsigned long)cset->dfl_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6574) cgroup_bpf_get(cset->dfl_cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6575) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6577) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6578) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6580) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6583) void cgroup_sk_clone(struct sock_cgroup_data *skcd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6585) if (skcd->val) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6586) if (skcd->no_refcnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6587) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6588) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6589) * We might be cloning a socket which is left in an empty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6590) * cgroup and the cgroup might have already been rmdir'd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6591) * Don't use cgroup_get_live().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6592) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6593) cgroup_get(sock_cgroup_ptr(skcd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6594) cgroup_bpf_get(sock_cgroup_ptr(skcd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6596) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6598) void cgroup_sk_free(struct sock_cgroup_data *skcd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6599) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6600) struct cgroup *cgrp = sock_cgroup_ptr(skcd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6602) if (skcd->no_refcnt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6603) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6604) cgroup_bpf_put(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6605) cgroup_put(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6608) #endif /* CONFIG_SOCK_CGROUP_DATA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6610) #ifdef CONFIG_CGROUP_BPF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6611) int cgroup_bpf_attach(struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6612) struct bpf_prog *prog, struct bpf_prog *replace_prog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6613) struct bpf_cgroup_link *link,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6614) enum bpf_attach_type type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6615) u32 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6616) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6617) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6619) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6620) ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, link, type, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6621) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6622) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6625) int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6626) enum bpf_attach_type type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6627) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6628) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6630) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6631) ret = __cgroup_bpf_detach(cgrp, prog, NULL, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6632) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6633) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6634) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6636) int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6637) union bpf_attr __user *uattr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6639) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6641) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6642) ret = __cgroup_bpf_query(cgrp, attr, uattr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6643) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6644) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6646) #endif /* CONFIG_CGROUP_BPF */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6648) #ifdef CONFIG_SYSFS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6649) static ssize_t show_delegatable_files(struct cftype *files, char *buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6650) ssize_t size, const char *prefix)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6652) struct cftype *cft;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6653) ssize_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6655) for (cft = files; cft && cft->name[0] != '\0'; cft++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6656) if (!(cft->flags & CFTYPE_NS_DELEGATABLE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6657) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6659) if ((cft->flags & CFTYPE_PRESSURE) && !cgroup_psi_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6660) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6662) if (prefix)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6663) ret += snprintf(buf + ret, size - ret, "%s.", prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6665) ret += snprintf(buf + ret, size - ret, "%s\n", cft->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6667) if (WARN_ON(ret >= size))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6668) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6669) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6671) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6672) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6674) static ssize_t delegate_show(struct kobject *kobj, struct kobj_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6675) char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6677) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6678) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6679) ssize_t ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6681) ret = show_delegatable_files(cgroup_base_files, buf, PAGE_SIZE - ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6682) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6684) for_each_subsys(ss, ssid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6685) ret += show_delegatable_files(ss->dfl_cftypes, buf + ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6686) PAGE_SIZE - ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6687) cgroup_subsys_name[ssid]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6689) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6691) static struct kobj_attribute cgroup_delegate_attr = __ATTR_RO(delegate);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6693) static ssize_t features_show(struct kobject *kobj, struct kobj_attribute *attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6694) char *buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6695) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6696) return snprintf(buf, PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6697) "nsdelegate\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6698) "memory_localevents\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6699) "memory_recursiveprot\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6700) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6701) static struct kobj_attribute cgroup_features_attr = __ATTR_RO(features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6703) static struct attribute *cgroup_sysfs_attrs[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6704) &cgroup_delegate_attr.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6705) &cgroup_features_attr.attr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6706) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6707) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6709) static const struct attribute_group cgroup_sysfs_attr_group = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6710) .attrs = cgroup_sysfs_attrs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6711) .name = "cgroup",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6712) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6714) static int __init cgroup_sysfs_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6715) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6716) return sysfs_create_group(kernel_kobj, &cgroup_sysfs_attr_group);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6718) subsys_initcall(cgroup_sysfs_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6720) #endif /* CONFIG_SYSFS */