^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include "cgroup-internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/ctype.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/kmod.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/sort.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/delay.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/sched/task.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/magic.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/delayacct.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/pid_namespace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/cgroupstats.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/fs_parser.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <trace/events/cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <trace/hooks/cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * pidlists linger the following amount before being destroyed. The goal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * is avoiding frequent destruction in the middle of consecutive read calls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * Expiring in the middle is a performance problem not a correctness one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * 1 sec should be enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #define CGROUP_PIDLIST_DESTROY_DELAY HZ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) /* Controllers blocked by the commandline in v1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) static u16 cgroup_no_v1_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) /* disable named v1 mounts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static bool cgroup_no_v1_named;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * pidlist destructions need to be flushed on cgroup destruction. Use a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * separate workqueue as flush domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) static struct workqueue_struct *cgroup_pidlist_destroy_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /* protects cgroup_subsys->release_agent_path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) static DEFINE_SPINLOCK(release_agent_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) bool cgroup1_ssid_disabled(int ssid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) return cgroup_no_v1_mask & (1 << ssid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * @from: attach to all cgroups of a given task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * @tsk: the task to be attached
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) struct cgroup_root *root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) int retval = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) percpu_down_write(&cgroup_threadgroup_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) for_each_root(root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) struct cgroup *from_cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) if (root == &cgrp_dfl_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) from_cgrp = task_cgroup_from_root(from, root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) retval = cgroup_attach_task(from_cgrp, tsk, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) if (retval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) percpu_up_write(&cgroup_threadgroup_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) return retval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) EXPORT_SYMBOL_GPL(cgroup_attach_task_all);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * cgroup_trasnsfer_tasks - move tasks from one cgroup to another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * @to: cgroup to which the tasks will be moved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) * @from: cgroup in which the tasks currently reside
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * Locking rules between cgroup_post_fork() and the migration path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * guarantee that, if a task is forking while being migrated, the new child
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * is guaranteed to be either visible in the source cgroup after the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * parent's migration is complete or put into the target cgroup. No task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * can slip out of migration through forking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) DEFINE_CGROUP_MGCTX(mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) struct cgrp_cset_link *link;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) struct css_task_iter it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) if (cgroup_on_dfl(to))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) ret = cgroup_migrate_vet_dst(to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) percpu_down_write(&cgroup_threadgroup_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /* all tasks in @from are being moved, all csets are source */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) spin_lock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) list_for_each_entry(link, &from->cset_links, cset_link)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) cgroup_migrate_add_src(link->cset, to, &mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) spin_unlock_irq(&css_set_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) ret = cgroup_migrate_prepare_dst(&mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * Migrate tasks one-by-one until @from is empty. This fails iff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * ->can_attach() fails.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) css_task_iter_start(&from->self, 0, &it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) task = css_task_iter_next(&it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) } while (task && (task->flags & PF_EXITING));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) if (task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) get_task_struct(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) css_task_iter_end(&it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) if (task) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) ret = cgroup_migrate(task, false, &mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) TRACE_CGROUP_PATH(transfer_tasks, to, task, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) put_task_struct(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) } while (task && !ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) cgroup_migrate_finish(&mgctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) percpu_up_write(&cgroup_threadgroup_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * Stuff for reading the 'tasks'/'procs' files.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * Reading this file can return large amounts of data if a cgroup has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * *lots* of attached tasks. So it may need several calls to read(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * but we cannot guarantee that the information we produce is correct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) * unless we produce it entirely atomically.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /* which pidlist file are we talking about? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) enum cgroup_filetype {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) CGROUP_FILE_PROCS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) CGROUP_FILE_TASKS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * A pidlist is a list of pids that virtually represents the contents of one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) * of the cgroup files ("procs" or "tasks"). We keep a list of such pidlists,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * a pair (one each for procs, tasks) for each pid namespace that's relevant
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * to the cgroup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) struct cgroup_pidlist {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * used to find which pidlist is wanted. doesn't change as long as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * this particular list stays in the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) struct { enum cgroup_filetype type; struct pid_namespace *ns; } key;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /* array of xids */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) pid_t *list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /* how many elements the above list has */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) int length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /* each of these stored in a list by its cgroup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) struct list_head links;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) /* pointer to the cgroup we belong to, for list removal purposes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) struct cgroup *owner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /* for delayed destruction */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) struct delayed_work destroy_dwork;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) * Used to destroy all pidlists lingering waiting for destroy timer. None
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * should be left afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) void cgroup1_pidlist_destroy_all(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) struct cgroup_pidlist *l, *tmp_l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) mutex_lock(&cgrp->pidlist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) list_for_each_entry_safe(l, tmp_l, &cgrp->pidlists, links)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) mutex_unlock(&cgrp->pidlist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) flush_workqueue(cgroup_pidlist_destroy_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) BUG_ON(!list_empty(&cgrp->pidlists));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) static void cgroup_pidlist_destroy_work_fn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) struct delayed_work *dwork = to_delayed_work(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) struct cgroup_pidlist *l = container_of(dwork, struct cgroup_pidlist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) destroy_dwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) struct cgroup_pidlist *tofree = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) mutex_lock(&l->owner->pidlist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * Destroy iff we didn't get queued again. The state won't change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * as destroy_dwork can only be queued while locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) if (!delayed_work_pending(dwork)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) list_del(&l->links);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) kvfree(l->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) put_pid_ns(l->key.ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) tofree = l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) mutex_unlock(&l->owner->pidlist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) kfree(tofree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) * pidlist_uniq - given a kmalloc()ed list, strip out all duplicate entries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * Returns the number of unique elements.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) static int pidlist_uniq(pid_t *list, int length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) int src, dest = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * we presume the 0th element is unique, so i starts at 1. trivial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * edge cases first; no work needs to be done for either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) if (length == 0 || length == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) return length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) /* src and dest walk down the list; dest counts unique elements */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) for (src = 1; src < length; src++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) /* find next unique element */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) while (list[src] == list[src-1]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) src++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) if (src == length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) goto after;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) /* dest always points to where the next unique element goes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) list[dest] = list[src];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) dest++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) after:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) return dest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * The two pid files - task and cgroup.procs - guaranteed that the result
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * is sorted, which forced this whole pidlist fiasco. As pid order is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * different per namespace, each namespace needs differently sorted list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * making it impossible to use, for example, single rbtree of member tasks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * sorted by task pointer. As pidlists can be fairly large, allocating one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * per open file is dangerous, so cgroup had to implement shared pool of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * pidlists keyed by cgroup and namespace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) static int cmppid(const void *a, const void *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) return *(pid_t *)a - *(pid_t *)b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) static struct cgroup_pidlist *cgroup_pidlist_find(struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) enum cgroup_filetype type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) struct cgroup_pidlist *l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) /* don't need task_nsproxy() if we're looking at ourself */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) struct pid_namespace *ns = task_active_pid_ns(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) lockdep_assert_held(&cgrp->pidlist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) list_for_each_entry(l, &cgrp->pidlists, links)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) if (l->key.type == type && l->key.ns == ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) return l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * find the appropriate pidlist for our purpose (given procs vs tasks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) * returns with the lock on that pidlist already held, and takes care
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) * of the use count, or returns NULL with no locks held if we're out of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) static struct cgroup_pidlist *cgroup_pidlist_find_create(struct cgroup *cgrp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) enum cgroup_filetype type)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) struct cgroup_pidlist *l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) lockdep_assert_held(&cgrp->pidlist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) l = cgroup_pidlist_find(cgrp, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) if (l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) return l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) /* entry not found; create a new one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) l = kzalloc(sizeof(struct cgroup_pidlist), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) if (!l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) return l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) INIT_DELAYED_WORK(&l->destroy_dwork, cgroup_pidlist_destroy_work_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) l->key.type = type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) /* don't need task_nsproxy() if we're looking at ourself */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) l->key.ns = get_pid_ns(task_active_pid_ns(current));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) l->owner = cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) list_add(&l->links, &cgrp->pidlists);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) return l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * Load a cgroup's pidarray with either procs' tgids or tasks' pids
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) struct cgroup_pidlist **lp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) pid_t *array;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) int length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) int pid, n = 0; /* used for populating the array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) struct css_task_iter it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) struct cgroup_pidlist *l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) lockdep_assert_held(&cgrp->pidlist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) * If cgroup gets more users after we read count, we won't have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) * enough space - tough. This race is indistinguishable to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) * caller from the case that the additional cgroup users didn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) * show up until sometime later on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) length = cgroup_task_count(cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) array = kvmalloc_array(length, sizeof(pid_t), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) if (!array)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) /* now, populate the array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) css_task_iter_start(&cgrp->self, 0, &it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) while ((tsk = css_task_iter_next(&it))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) if (unlikely(n == length))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) /* get tgid or pid for procs or tasks file respectively */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) if (type == CGROUP_FILE_PROCS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) pid = task_tgid_vnr(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) pid = task_pid_vnr(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) if (pid > 0) /* make sure to only use valid results */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) array[n++] = pid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) css_task_iter_end(&it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) length = n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /* now sort & (if procs) strip out duplicates */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) sort(array, length, sizeof(pid_t), cmppid, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) if (type == CGROUP_FILE_PROCS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) length = pidlist_uniq(array, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) l = cgroup_pidlist_find_create(cgrp, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) if (!l) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) kvfree(array);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) /* store array, freeing old if necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) kvfree(l->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) l->list = array;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) l->length = length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) *lp = l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) * seq_file methods for the tasks/procs files. The seq_file position is the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) * next pid to display; the seq_file iterator is a pointer to the pid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) * in the cgroup->l->list array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) static void *cgroup_pidlist_start(struct seq_file *s, loff_t *pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) * Initially we receive a position value that corresponds to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) * one more than the last pid shown (or 0 on the first call or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) * after a seek to the start). Use a binary-search to find the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) * next pid to display, if any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) struct kernfs_open_file *of = s->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) struct cgroup *cgrp = seq_css(s)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) struct cgroup_pidlist *l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) enum cgroup_filetype type = seq_cft(s)->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) int index = 0, pid = *pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) int *iter, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) mutex_lock(&cgrp->pidlist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * !NULL @ctx->procs1.pidlist indicates that this isn't the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * start() after open. If the matching pidlist is around, we can use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) * that. Look for it. Note that @ctx->procs1.pidlist can't be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * directly. It could already have been destroyed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) if (ctx->procs1.pidlist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) ctx->procs1.pidlist = cgroup_pidlist_find(cgrp, type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * Either this is the first start() after open or the matching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * pidlist has been destroyed inbetween. Create a new one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) if (!ctx->procs1.pidlist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) ret = pidlist_array_load(cgrp, type, &ctx->procs1.pidlist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) l = ctx->procs1.pidlist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) if (pid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) int end = l->length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) while (index < end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) int mid = (index + end) / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) if (l->list[mid] == pid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) index = mid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) } else if (l->list[mid] <= pid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) index = mid + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) end = mid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) /* If we're off the end of the array, we're done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) if (index >= l->length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) /* Update the abstract position to be the actual pid that we found */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) iter = l->list + index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) *pos = *iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) return iter;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) static void cgroup_pidlist_stop(struct seq_file *s, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) struct kernfs_open_file *of = s->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) struct cgroup_pidlist *l = ctx->procs1.pidlist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) if (l)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) mod_delayed_work(cgroup_pidlist_destroy_wq, &l->destroy_dwork,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) CGROUP_PIDLIST_DESTROY_DELAY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) mutex_unlock(&seq_css(s)->cgroup->pidlist_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) struct kernfs_open_file *of = s->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) struct cgroup_file_ctx *ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) struct cgroup_pidlist *l = ctx->procs1.pidlist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) pid_t *p = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) pid_t *end = l->list + l->length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) * Advance to the next pid in the array. If this goes off the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) * end, we're done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) p++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) if (p >= end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) (*pos)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) *pos = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) return p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) static int cgroup_pidlist_show(struct seq_file *s, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) seq_printf(s, "%d\n", *(int *)v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) static ssize_t __cgroup1_procs_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) char *buf, size_t nbytes, loff_t off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) bool threadgroup)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) const struct cred *cred, *tcred;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) bool locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) cgrp = cgroup_kn_lock_live(of->kn, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) if (!cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) task = cgroup_procs_write_start(buf, threadgroup, &locked, cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) ret = PTR_ERR_OR_ZERO(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * Even if we're attaching all tasks in the thread group, we only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * need to check permissions on one of them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) cred = current_cred();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) tcred = get_task_cred(task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) !uid_eq(cred->euid, tcred->uid) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) !uid_eq(cred->euid, tcred->suid) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) !ns_capable(tcred->user_ns, CAP_SYS_NICE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) ret = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) put_cred(tcred);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) goto out_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) ret = cgroup_attach_task(cgrp, task, threadgroup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) trace_android_vh_cgroup_set_task(ret, task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) out_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) cgroup_procs_write_finish(task, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) cgroup_kn_unlock(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) return ret ?: nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) static ssize_t cgroup1_procs_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) return __cgroup1_procs_write(of, buf, nbytes, off, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) static ssize_t cgroup1_tasks_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) return __cgroup1_procs_write(of, buf, nbytes, off, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) static ssize_t cgroup_release_agent_write(struct kernfs_open_file *of,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) char *buf, size_t nbytes, loff_t off)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) struct cgroup_file_ctx *ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) * Release agent gets called with all capabilities,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) * require capabilities to set release agent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) ctx = of->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) if ((ctx->ns->user_ns != &init_user_ns) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) !file_ns_capable(of->file, &init_user_ns, CAP_SYS_ADMIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) cgrp = cgroup_kn_lock_live(of->kn, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) if (!cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) return -ENODEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) spin_lock(&release_agent_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) strlcpy(cgrp->root->release_agent_path, strstrip(buf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) sizeof(cgrp->root->release_agent_path));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) spin_unlock(&release_agent_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) cgroup_kn_unlock(of->kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) return nbytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) static int cgroup_release_agent_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) struct cgroup *cgrp = seq_css(seq)->cgroup;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) spin_lock(&release_agent_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) seq_puts(seq, cgrp->root->release_agent_path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) spin_unlock(&release_agent_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) seq_putc(seq, '\n');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) static int cgroup_sane_behavior_show(struct seq_file *seq, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) seq_puts(seq, "0\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) struct cftype *cft)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) return notify_on_release(css->cgroup);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) struct cftype *cft, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) if (val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) clear_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) static u64 cgroup_clone_children_read(struct cgroup_subsys_state *css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) struct cftype *cft)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) return test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) static int cgroup_clone_children_write(struct cgroup_subsys_state *css,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) struct cftype *cft, u64 val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) if (val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) set_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) clear_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) /* cgroup core interface files for the legacy hierarchies */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) struct cftype cgroup1_base_files[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) .name = "cgroup.procs",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) .seq_start = cgroup_pidlist_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) .seq_next = cgroup_pidlist_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) .seq_stop = cgroup_pidlist_stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) .seq_show = cgroup_pidlist_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) .private = CGROUP_FILE_PROCS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) .write = cgroup1_procs_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) .name = "cgroup.clone_children",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) .read_u64 = cgroup_clone_children_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) .write_u64 = cgroup_clone_children_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) .name = "cgroup.sane_behavior",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) .flags = CFTYPE_ONLY_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) .seq_show = cgroup_sane_behavior_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) .name = "tasks",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) .seq_start = cgroup_pidlist_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) .seq_next = cgroup_pidlist_next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) .seq_stop = cgroup_pidlist_stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) .seq_show = cgroup_pidlist_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) .private = CGROUP_FILE_TASKS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) .write = cgroup1_tasks_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) .name = "notify_on_release",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) .read_u64 = cgroup_read_notify_on_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) .write_u64 = cgroup_write_notify_on_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) .name = "release_agent",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) .flags = CFTYPE_ONLY_ON_ROOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) .seq_show = cgroup_release_agent_show,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) .write = cgroup_release_agent_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) .max_write_len = PATH_MAX - 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) { } /* terminate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) /* Display information about each subsystem and each hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) int proc_cgroupstats_show(struct seq_file *m, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) seq_puts(m, "#subsys_name\thierarchy\tnum_cgroups\tenabled\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) * ideally we don't want subsystems moving around while we do this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) * cgroup_mutex is also necessary to guarantee an atomic snapshot of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) * subsys/hierarchy state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) for_each_subsys(ss, i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) seq_printf(m, "%s\t%d\t%d\t%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) ss->legacy_name, ss->root->hierarchy_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) atomic_read(&ss->root->nr_cgrps),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) cgroup_ssid_enabled(i));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) * cgroupstats_build - build and fill cgroupstats
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) * @stats: cgroupstats to fill information into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * @dentry: A dentry entry belonging to the cgroup for which stats have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * been requested.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) * Build and fill cgroupstats so that taskstats can export it to user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) * space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) int cgroupstats_build(struct cgroupstats *stats, struct dentry *dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) struct kernfs_node *kn = kernfs_node_from_dentry(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) struct cgroup *cgrp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) struct css_task_iter it;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) /* it should be kernfs_node belonging to cgroupfs and is a directory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) if (dentry->d_sb->s_type != &cgroup_fs_type || !kn ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) kernfs_type(kn) != KERNFS_DIR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) * We aren't being called from kernfs and there's no guarantee on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) * @kn->priv's validity. For this and css_tryget_online_from_dir(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) * @kn->priv is RCU safe. Let's do the RCU dancing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) cgrp = rcu_dereference(*(void __rcu __force **)&kn->priv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) if (!cgrp || cgroup_is_dead(cgrp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) css_task_iter_start(&cgrp->self, 0, &it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) while ((tsk = css_task_iter_next(&it))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) switch (tsk->state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) case TASK_RUNNING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) stats->nr_running++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) case TASK_INTERRUPTIBLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) stats->nr_sleeping++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) case TASK_UNINTERRUPTIBLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) stats->nr_uninterruptible++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) case TASK_STOPPED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) stats->nr_stopped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) if (delayacct_is_task_waiting_on_io(tsk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) stats->nr_io_wait++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) css_task_iter_end(&it);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) void cgroup1_check_for_release(struct cgroup *cgrp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) if (notify_on_release(cgrp) && !cgroup_is_populated(cgrp) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) schedule_work(&cgrp->release_agent_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) * Notify userspace when a cgroup is released, by running the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) * configured release agent with the name of the cgroup (path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) * relative to the root of cgroup file system) as the argument.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) * Most likely, this user command will try to rmdir this cgroup.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) * This races with the possibility that some other task will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) * attached to this cgroup before it is removed, or that some other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * user task will 'mkdir' a child cgroup of this cgroup. That's ok.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) * The presumed 'rmdir' will fail quietly if this cgroup is no longer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) * unused, and this cgroup will be reprieved from its death sentence,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) * to continue to serve a useful existence. Next time it's released,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) * we will get notified again, if it still has 'notify_on_release' set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) * The final arg to call_usermodehelper() is UMH_WAIT_EXEC, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * means only wait until the task is successfully execve()'d. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * separate release agent task is forked by call_usermodehelper(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * then control in this thread returns here, without waiting for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * release agent task. We don't bother to wait because the caller of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) * this routine has no use for the exit status of the release agent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) * task, so no sense holding our caller up for that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) void cgroup1_release_agent(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) struct cgroup *cgrp =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) container_of(work, struct cgroup, release_agent_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) char *pathbuf, *agentbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) char *argv[3], *envp[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) /* snoop agent path and exit early if empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) if (!cgrp->root->release_agent_path[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) /* prepare argument buffers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) agentbuf = kmalloc(PATH_MAX, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) if (!pathbuf || !agentbuf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) spin_lock(&release_agent_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) strlcpy(agentbuf, cgrp->root->release_agent_path, PATH_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) spin_unlock(&release_agent_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) if (!agentbuf[0])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) ret = cgroup_path_ns(cgrp, pathbuf, PATH_MAX, &init_cgroup_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) if (ret < 0 || ret >= PATH_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) argv[0] = agentbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) argv[1] = pathbuf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) argv[2] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) /* minimal command environment */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) envp[0] = "HOME=/";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) envp[2] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) kfree(agentbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) kfree(pathbuf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) * cgroup_rename - Only allow simple rename of directories in place.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) static int cgroup1_rename(struct kernfs_node *kn, struct kernfs_node *new_parent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) const char *new_name_str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) struct cgroup *cgrp = kn->priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) /* do not accept '\n' to prevent making /proc/<pid>/cgroup unparsable */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) if (strchr(new_name_str, '\n'))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) if (kernfs_type(kn) != KERNFS_DIR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) return -ENOTDIR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) if (kn->parent != new_parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) * We're gonna grab cgroup_mutex which nests outside kernfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) * active_ref. kernfs_rename() doesn't require active_ref
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) * protection. Break them before grabbing cgroup_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) kernfs_break_active_protection(new_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) kernfs_break_active_protection(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) mutex_lock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) ret = kernfs_rename(kn, new_parent, new_name_str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) TRACE_CGROUP_PATH(rename, cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) kernfs_unbreak_active_protection(kn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) kernfs_unbreak_active_protection(new_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) static int cgroup1_show_options(struct seq_file *seq, struct kernfs_root *kf_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) struct cgroup_root *root = cgroup_root_from_kf(kf_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) int ssid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) for_each_subsys(ss, ssid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) if (root->subsys_mask & (1 << ssid))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) seq_show_option(seq, ss->legacy_name, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) if (root->flags & CGRP_ROOT_NOPREFIX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) seq_puts(seq, ",noprefix");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) if (root->flags & CGRP_ROOT_XATTR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) seq_puts(seq, ",xattr");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) if (root->flags & CGRP_ROOT_CPUSET_V2_MODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) seq_puts(seq, ",cpuset_v2_mode");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) spin_lock(&release_agent_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) if (strlen(root->release_agent_path))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) seq_show_option(seq, "release_agent",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) root->release_agent_path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) spin_unlock(&release_agent_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) seq_puts(seq, ",clone_children");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) if (strlen(root->name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) seq_show_option(seq, "name", root->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) enum cgroup1_param {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) Opt_all,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) Opt_clone_children,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) Opt_cpuset_v2_mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) Opt_name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) Opt_none,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) Opt_noprefix,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) Opt_release_agent,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) Opt_xattr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) const struct fs_parameter_spec cgroup1_fs_parameters[] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) fsparam_flag ("all", Opt_all),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) fsparam_flag ("clone_children", Opt_clone_children),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) fsparam_flag ("cpuset_v2_mode", Opt_cpuset_v2_mode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) fsparam_string("name", Opt_name),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) fsparam_flag ("none", Opt_none),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) fsparam_flag ("noprefix", Opt_noprefix),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) fsparam_string("release_agent", Opt_release_agent),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) fsparam_flag ("xattr", Opt_xattr),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) struct fs_parse_result result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) int opt, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) opt = fs_parse(fc, cgroup1_fs_parameters, param, &result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) if (opt == -ENOPARAM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) if (strcmp(param->key, "source") == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) if (param->type != fs_value_is_string)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) return invalf(fc, "Non-string source");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) if (fc->source)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) return invalf(fc, "Multiple sources not supported");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) fc->source = param->string;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) param->string = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) for_each_subsys(ss, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) if (strcmp(param->key, ss->legacy_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) if (!cgroup_ssid_enabled(i) || cgroup1_ssid_disabled(i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) return invalfc(fc, "Disabled controller '%s'",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) param->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) ctx->subsys_mask |= (1 << i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) return invalfc(fc, "Unknown subsys name '%s'", param->key);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) if (opt < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) return opt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) switch (opt) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) case Opt_none:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) /* Explicitly have no subsystems */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) ctx->none = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) case Opt_all:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) ctx->all_ss = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) case Opt_noprefix:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) ctx->flags |= CGRP_ROOT_NOPREFIX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) case Opt_clone_children:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) ctx->cpuset_clone_children = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) case Opt_cpuset_v2_mode:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) ctx->flags |= CGRP_ROOT_CPUSET_V2_MODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) case Opt_xattr:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) ctx->flags |= CGRP_ROOT_XATTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) case Opt_release_agent:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) /* Specifying two release agents is forbidden */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) if (ctx->release_agent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) return invalfc(fc, "release_agent respecified");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) * Release agent gets called with all capabilities,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) * require capabilities to set release agent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) if ((fc->user_ns != &init_user_ns) || !capable(CAP_SYS_ADMIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) return invalfc(fc, "Setting release_agent not allowed");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) ctx->release_agent = param->string;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) param->string = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) case Opt_name:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) /* blocked by boot param? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) if (cgroup_no_v1_named)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) /* Can't specify an empty name */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (!param->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) return invalfc(fc, "Empty name");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) if (param->size > MAX_CGROUP_ROOT_NAMELEN - 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) return invalfc(fc, "Name too long");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) /* Must match [\w.-]+ */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) for (i = 0; i < param->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) char c = param->string[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) if (isalnum(c))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) if ((c == '.') || (c == '-') || (c == '_'))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) return invalfc(fc, "Invalid name");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) /* Specifying two names is forbidden */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) if (ctx->name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) return invalfc(fc, "name respecified");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) ctx->name = param->string;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) param->string = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) static int check_cgroupfs_options(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) u16 mask = U16_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) u16 enabled = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) #ifdef CONFIG_CPUSETS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) mask = ~((u16)1 << cpuset_cgrp_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) for_each_subsys(ss, i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) if (cgroup_ssid_enabled(i) && !cgroup1_ssid_disabled(i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) enabled |= 1 << i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) ctx->subsys_mask &= enabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) * In absense of 'none', 'name=' or subsystem name options,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) * let's default to 'all'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) if (!ctx->subsys_mask && !ctx->none && !ctx->name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) ctx->all_ss = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) if (ctx->all_ss) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) /* Mutually exclusive option 'all' + subsystem name */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) if (ctx->subsys_mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) return invalfc(fc, "subsys name conflicts with all");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) /* 'all' => select all the subsystems */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) ctx->subsys_mask = enabled;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) * We either have to specify by name or by subsystems. (So all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) * empty hierarchies must have a name).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) if (!ctx->subsys_mask && !ctx->name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) return invalfc(fc, "Need name or subsystem set");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) * Option noprefix was introduced just for backward compatibility
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) * with the old cpuset, so we allow noprefix only if mounting just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) * the cpuset subsystem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) if ((ctx->flags & CGRP_ROOT_NOPREFIX) && (ctx->subsys_mask & mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) return invalfc(fc, "noprefix used incorrectly");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) /* Can't specify "none" and some subsystems */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) if (ctx->subsys_mask && ctx->none)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) return invalfc(fc, "none used incorrectly");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) int cgroup1_reconfigure(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) struct kernfs_root *kf_root = kernfs_root_from_sb(fc->root->d_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) struct cgroup_root *root = cgroup_root_from_kf(kf_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) u16 added_mask, removed_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) /* See what subsystems are wanted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) ret = check_cgroupfs_options(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) if (ctx->subsys_mask != root->subsys_mask || ctx->release_agent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) pr_warn("option changes via remount are deprecated (pid=%d comm=%s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) task_tgid_nr(current), current->comm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) added_mask = ctx->subsys_mask & ~root->subsys_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) removed_mask = root->subsys_mask & ~ctx->subsys_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) /* Don't allow flags or name to change at remount */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) if ((ctx->flags ^ root->flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) (ctx->name && strcmp(ctx->name, root->name))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) errorfc(fc, "option or name mismatch, new: 0x%x \"%s\", old: 0x%x \"%s\"",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) ctx->flags, ctx->name ?: "", root->flags, root->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) /* remounting is not allowed for populated hierarchies */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) if (!list_empty(&root->cgrp.self.children)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) ret = rebind_subsystems(root, added_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) WARN_ON(rebind_subsystems(&cgrp_dfl_root, removed_mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) if (ctx->release_agent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) spin_lock(&release_agent_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) strcpy(root->release_agent_path, ctx->release_agent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) spin_unlock(&release_agent_path_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) trace_cgroup_remount(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) struct kernfs_syscall_ops cgroup1_kf_syscall_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) .rename = cgroup1_rename,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) .show_options = cgroup1_show_options,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) .mkdir = cgroup_mkdir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) .rmdir = cgroup_rmdir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) .show_path = cgroup_show_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) * The guts of cgroup1 mount - find or create cgroup_root to use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) * Called with cgroup_mutex held; returns 0 on success, -E... on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) * error and positive - in case when the candidate is busy dying.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) * On success it stashes a reference to cgroup_root into given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) * cgroup_fs_context; that reference is *NOT* counting towards the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * cgroup_root refcount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) static int cgroup1_root_to_use(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) struct cgroup_root *root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) int i, ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) /* First find the desired set of subsystems */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) ret = check_cgroupfs_options(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) * Destruction of cgroup root is asynchronous, so subsystems may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) * still be dying after the previous unmount. Let's drain the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) * dying subsystems. We just need to ensure that the ones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) * unmounted previously finish dying and don't care about new ones
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) * starting. Testing ref liveliness is good enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) for_each_subsys(ss, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) if (!(ctx->subsys_mask & (1 << i)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) ss->root == &cgrp_dfl_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) if (!percpu_ref_tryget_live(&ss->root->cgrp.self.refcnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) return 1; /* restart */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) cgroup_put(&ss->root->cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) for_each_root(root) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) bool name_match = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) if (root == &cgrp_dfl_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) * If we asked for a name then it must match. Also, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) * name matches but sybsys_mask doesn't, we should fail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) * Remember whether name matched.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) if (ctx->name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) if (strcmp(ctx->name, root->name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) name_match = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) * If we asked for subsystems (or explicitly for no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) * subsystems) then they must match.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) if ((ctx->subsys_mask || ctx->none) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) (ctx->subsys_mask != root->subsys_mask)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) if (!name_match)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) if (root->flags ^ ctx->flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) pr_warn("new mount options do not match the existing superblock, will be ignored\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) ctx->root = root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) * No such thing, create a new one. name= matching without subsys
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) * specification is allowed for already existing hierarchies but we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) * can't create new one without subsys specification.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) if (!ctx->subsys_mask && !ctx->none)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) return invalfc(fc, "No subsys list or none specified");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) /* Hierarchies may only be created in the initial cgroup namespace. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) if (ctx->ns != &init_cgroup_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) root = kzalloc(sizeof(*root), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) if (!root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) ctx->root = root;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) init_cgroup_root(ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) ret = cgroup_setup_root(root, ctx->subsys_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) cgroup_free_root(root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) int cgroup1_get_tree(struct fs_context *fc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) struct cgroup_fs_context *ctx = cgroup_fc2context(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) /* Check if the caller has permission to mount. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) if (!ns_capable(ctx->ns->user_ns, CAP_SYS_ADMIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) ret = cgroup1_root_to_use(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) if (!ret && !percpu_ref_tryget_live(&ctx->root->cgrp.self.refcnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) ret = 1; /* restart */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) mutex_unlock(&cgroup_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) ret = cgroup_do_get_tree(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) if (!ret && percpu_ref_is_dying(&ctx->root->cgrp.self.refcnt)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) fc_drop_locked(fc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) if (unlikely(ret > 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) msleep(10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) return restart_syscall();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) static int __init cgroup1_wq_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) * Used to destroy pidlists and separate to serve as flush domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) * Cap @max_active to 1 too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) cgroup_pidlist_destroy_wq = alloc_workqueue("cgroup_pidlist_destroy",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 0, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) BUG_ON(!cgroup_pidlist_destroy_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) core_initcall(cgroup1_wq_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) static int __init cgroup_no_v1(char *str)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) struct cgroup_subsys *ss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) char *token;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) while ((token = strsep(&str, ",")) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) if (!*token)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) if (!strcmp(token, "all")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) cgroup_no_v1_mask = U16_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) if (!strcmp(token, "named")) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) cgroup_no_v1_named = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) for_each_subsys(ss, i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) if (strcmp(token, ss->name) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) strcmp(token, ss->legacy_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) cgroup_no_v1_mask |= 1 << i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) __setup("cgroup_no_v1=", cgroup_no_v1);