^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Basic worker thread pool for io_uring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2019 Jens Axboe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/rculist_nulls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/fs_struct.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/task_work.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/blk-cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/audit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include "../kernel/sched/sched.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include "io-wq.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #define WORKER_IDLE_TIMEOUT (5 * HZ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) IO_WORKER_F_UP = 1, /* up and active */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) IO_WORKER_F_RUNNING = 2, /* account as running */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) IO_WORKER_F_FREE = 4, /* worker on free list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) IO_WORKER_F_FIXED = 8, /* static idle worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) IO_WORKER_F_BOUND = 16, /* is doing bounded work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) IO_WQ_BIT_EXIT = 0, /* wq exiting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) IO_WQ_BIT_CANCEL = 1, /* cancel work on list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) IO_WQ_BIT_ERROR = 2, /* error on setup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) IO_WQE_FLAG_STALLED = 1, /* stalled on hash */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * One for each thread in a wqe pool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) struct io_worker {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) refcount_t ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) struct hlist_nulls_node nulls_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) struct list_head all_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) struct io_wqe *wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) struct io_wq_work *cur_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) struct rcu_head rcu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct cgroup_subsys_state *blkcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) const struct cred *cur_creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) const struct cred *saved_creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) struct files_struct *restore_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) struct nsproxy *restore_nsproxy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) struct fs_struct *restore_fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #if BITS_PER_LONG == 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #define IO_WQ_HASH_ORDER 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #define IO_WQ_HASH_ORDER 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) struct io_wqe_acct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) unsigned nr_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) unsigned max_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) atomic_t nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) IO_WQ_ACCT_BOUND,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) IO_WQ_ACCT_UNBOUND,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * Per-node worker thread pool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) struct io_wqe {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) raw_spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) struct io_wq_work_list work_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) unsigned long hash_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) } ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) struct io_wqe_acct acct[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) struct hlist_nulls_head free_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) struct list_head all_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) struct io_wq *wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * Per io_wq state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) struct io_wq {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) struct io_wqe **wqes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) unsigned long state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) free_work_fn *free_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) io_wq_work_fn *do_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) struct task_struct *manager;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) struct user_struct *user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) refcount_t refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct completion done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) struct hlist_node cpuhp_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) refcount_t use_refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) static enum cpuhp_state io_wq_online;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) static bool io_worker_get(struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) return refcount_inc_not_zero(&worker->ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) static void io_worker_release(struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) if (refcount_dec_and_test(&worker->ref))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) wake_up_process(worker->task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * Note: drops the wqe->lock if returning true! The caller must re-acquire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) * the lock in that case. Some callers need to restart handling if this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) * happens, so we can't just re-acquire the lock on behalf of the caller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) bool dropped_lock = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) if (worker->saved_creds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) revert_creds(worker->saved_creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) worker->cur_creds = worker->saved_creds = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) if (current->files != worker->restore_files) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) __acquire(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) dropped_lock = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) task_lock(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) current->files = worker->restore_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) current->nsproxy = worker->restore_nsproxy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) task_unlock(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) if (current->fs != worker->restore_fs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) current->fs = worker->restore_fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * If we have an active mm, we need to drop the wq lock before unusing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * it. If we do, return true and let the caller retry the idle loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) if (worker->mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) if (!dropped_lock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) __acquire(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) dropped_lock = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) __set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) kthread_unuse_mm(worker->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) mmput(worker->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) worker->mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (worker->blkcg_css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) kthread_associate_blkcg(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) worker->blkcg_css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) return dropped_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) if (work->flags & IO_WQ_WORK_UNBOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) return &wqe->acct[IO_WQ_ACCT_UNBOUND];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) return &wqe->acct[IO_WQ_ACCT_BOUND];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) if (worker->flags & IO_WORKER_F_BOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) return &wqe->acct[IO_WQ_ACCT_BOUND];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) return &wqe->acct[IO_WQ_ACCT_UNBOUND];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) static void io_worker_exit(struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) struct io_wqe *wqe = worker->wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * If we're not at zero, someone else is holding a brief reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * to the worker. Wait for that to go away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) set_current_state(TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) if (!refcount_dec_and_test(&worker->ref))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) __set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) current->flags &= ~PF_IO_WORKER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) if (worker->flags & IO_WORKER_F_RUNNING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) atomic_dec(&acct->nr_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) if (!(worker->flags & IO_WORKER_F_BOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) atomic_dec(&wqe->wq->user->processes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) worker->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) hlist_nulls_del_rcu(&worker->nulls_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) list_del_rcu(&worker->all_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) if (__io_worker_unuse(wqe, worker)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) __release(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) acct->nr_workers--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) kfree_rcu(worker, rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) if (refcount_dec_and_test(&wqe->wq->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) complete(&wqe->wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) static inline bool io_wqe_run_queue(struct io_wqe *wqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) __must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) if (!wq_list_empty(&wqe->work_list) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) !(wqe->flags & IO_WQE_FLAG_STALLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * Check head of free list for an available worker. If one isn't available,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * caller must wake up the wq manager to create one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) __must_hold(RCU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) struct hlist_nulls_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) struct io_worker *worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) if (is_a_nulls(n))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) worker = hlist_nulls_entry(n, struct io_worker, nulls_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) if (io_worker_get(worker)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) wake_up_process(worker->task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) io_worker_release(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) * We need a worker. If we find a free one, we're good. If not, and we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) * below the max number of workers, wake up the manager to create one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * Most likely an attempt to queue unbounded work on an io_wq that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * wasn't setup with any unbounded workers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) if (unlikely(!acct->max_workers))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) pr_warn_once("io-wq is not configured for unbound workers");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) ret = io_wqe_activate_free_worker(wqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (!ret && acct->nr_workers < acct->max_workers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) wake_up_process(wqe->wq->manager);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) static void io_wqe_inc_running(struct io_wqe *wqe, struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) atomic_inc(&acct->nr_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) static void io_wqe_dec_running(struct io_wqe *wqe, struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) __must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) io_wqe_wake_worker(wqe, acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) allow_kernel_signal(SIGINT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) current->flags |= PF_IO_WORKER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) worker->restore_files = current->files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) worker->restore_nsproxy = current->nsproxy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) worker->restore_fs = current->fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) io_wqe_inc_running(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) * Worker will start processing some work. Move it to the busy list, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * it's currently on the freelist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) __must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) bool worker_bound, work_bound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) if (worker->flags & IO_WORKER_F_FREE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) worker->flags &= ~IO_WORKER_F_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) hlist_nulls_del_init_rcu(&worker->nulls_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * If worker is moving from bound to unbound (or vice versa), then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * ensure we update the running accounting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) if (worker_bound != work_bound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) io_wqe_dec_running(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) if (work_bound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) worker->flags |= IO_WORKER_F_BOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) atomic_dec(&wqe->wq->user->processes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) worker->flags &= ~IO_WORKER_F_BOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) atomic_inc(&wqe->wq->user->processes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) io_wqe_inc_running(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) * No work, worker going to sleep. Move to freelist, and unuse mm if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) * have one attached. Dropping the mm may potentially sleep, so we drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) * the lock in that case and return success. Since the caller has to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) * retry the loop in that case (we changed task state), we don't regrab
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) * the lock if we return success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) __must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if (!(worker->flags & IO_WORKER_F_FREE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) worker->flags |= IO_WORKER_F_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) return __io_worker_unuse(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) static inline unsigned int io_get_work_hash(struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) return work->flags >> IO_WQ_HASH_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) __must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) struct io_wq_work_node *node, *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) struct io_wq_work *work, *tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) wq_list_for_each(node, prev, &wqe->work_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) work = container_of(node, struct io_wq_work, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) /* not hashed, can run anytime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) if (!io_wq_is_hashed(work)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) wq_list_del(&wqe->work_list, node, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) /* hashed, can run if not already running */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) hash = io_get_work_hash(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) if (!(wqe->hash_map & BIT(hash))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) wqe->hash_map |= BIT(hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) /* all items with this hash lie in [work, tail] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) tail = wqe->hash_tail[hash];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) wqe->hash_tail[hash] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) wq_list_cut(&wqe->work_list, &tail->list, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) if (worker->mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) kthread_unuse_mm(worker->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) mmput(worker->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) worker->mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) if (mmget_not_zero(work->identity->mm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) kthread_use_mm(work->identity->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) worker->mm = work->identity->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) /* failed grabbing mm, ensure work gets cancelled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) work->flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) static inline void io_wq_switch_blkcg(struct io_worker *worker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) if (!(work->flags & IO_WQ_WORK_BLKCG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) if (work->identity->blkcg_css != worker->blkcg_css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) kthread_associate_blkcg(work->identity->blkcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) worker->blkcg_css = work->identity->blkcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) static void io_wq_switch_creds(struct io_worker *worker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) const struct cred *old_creds = override_creds(work->identity->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) worker->cur_creds = work->identity->creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) if (worker->saved_creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) put_cred(old_creds); /* creds set by previous switch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) worker->saved_creds = old_creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) static void io_impersonate_work(struct io_worker *worker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) if ((work->flags & IO_WQ_WORK_FILES) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) current->files != work->identity->files) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) task_lock(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) current->files = work->identity->files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) current->nsproxy = work->identity->nsproxy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) task_unlock(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) if (!work->identity->files) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) /* failed grabbing files, ensure work gets cancelled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) work->flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) current->fs = work->identity->fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) io_wq_switch_mm(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) if ((work->flags & IO_WQ_WORK_CREDS) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) worker->cur_creds != work->identity->creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) io_wq_switch_creds(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) if (work->flags & IO_WQ_WORK_FSIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) else if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) io_wq_switch_blkcg(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) current->loginuid = work->identity->loginuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) current->sessionid = work->identity->sessionid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) static void io_assign_current_work(struct io_worker *worker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) if (work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) /* flush pending signals before assigning new work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) if (signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) flush_signals(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) current->sessionid = AUDIT_SID_UNSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) spin_lock_irq(&worker->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) worker->cur_work = work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) spin_unlock_irq(&worker->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) static void io_worker_handle_work(struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) __releases(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) struct io_wqe *wqe = worker->wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) struct io_wq *wq = wqe->wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) struct io_wq_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) get_next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) * If we got some work, mark us as busy. If we didn't, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) * the list isn't empty, it means we stalled on hashed work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) * Mark us stalled so we don't keep looking for work when we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * can't make progress, any work completion or insertion will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) * clear the stalled flag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) work = io_get_next_work(wqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) if (work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) __io_worker_busy(wqe, worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) else if (!wq_list_empty(&wqe->work_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) wqe->flags |= IO_WQE_FLAG_STALLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) if (!work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) io_assign_current_work(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) /* handle a whole dependent link */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) struct io_wq_work *old_work, *next_hashed, *linked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) unsigned int hash = io_get_work_hash(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) next_hashed = wq_next_work(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) io_impersonate_work(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * OK to set IO_WQ_WORK_CANCEL even for uncancellable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * work, the worker function will do the right thing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) work->flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) old_work = work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) linked = wq->do_work(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) work = next_hashed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) if (!work && linked && !io_wq_is_hashed(linked)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) work = linked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) linked = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) io_assign_current_work(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) wq->free_work(old_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) if (linked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) io_wqe_enqueue(wqe, linked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) if (hash != -1U && !next_hashed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) wqe->hash_map &= ~BIT_ULL(hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) wqe->flags &= ~IO_WQE_FLAG_STALLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) /* skip unnecessary unlock-lock wqe->lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) if (!work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) goto get_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) } while (work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) } while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) static int io_wqe_worker(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) struct io_worker *worker = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) struct io_wqe *wqe = worker->wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) struct io_wq *wq = wqe->wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) io_worker_start(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) set_current_state(TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) if (io_wqe_run_queue(wqe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) __set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) io_worker_handle_work(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) goto loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) /* drops the lock on success, retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) if (__io_worker_idle(wqe, worker)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) __release(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) goto loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) if (signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) flush_signals(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) if (schedule_timeout(WORKER_IDLE_TIMEOUT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) /* timed out, exit unless we're the fixed worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) !(worker->flags & IO_WORKER_F_FIXED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) if (!wq_list_empty(&wqe->work_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) io_worker_handle_work(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) io_worker_exit(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) * Called when a worker is scheduled in. Mark us as currently running.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) void io_wq_worker_running(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) struct io_worker *worker = kthread_data(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) struct io_wqe *wqe = worker->wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) if (!(worker->flags & IO_WORKER_F_UP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) if (worker->flags & IO_WORKER_F_RUNNING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) worker->flags |= IO_WORKER_F_RUNNING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) io_wqe_inc_running(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) * Called when worker is going to sleep. If there are no workers currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) * running and we have work pending, wake up a free one or have the manager
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) * set one up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) void io_wq_worker_sleeping(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) struct io_worker *worker = kthread_data(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) struct io_wqe *wqe = worker->wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) if (!(worker->flags & IO_WORKER_F_UP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) if (!(worker->flags & IO_WORKER_F_RUNNING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) worker->flags &= ~IO_WORKER_F_RUNNING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) io_wqe_dec_running(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) struct io_wqe_acct *acct = &wqe->acct[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) struct io_worker *worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) if (!worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) refcount_set(&worker->ref, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) worker->nulls_node.pprev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) worker->wqe = wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) spin_lock_init(&worker->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) worker->task = kthread_create_on_node(io_wqe_worker, worker, wqe->node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) "io_wqe_worker-%d/%d", index, wqe->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (IS_ERR(worker->task)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) kfree(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) kthread_bind_mask(worker->task, cpumask_of_node(wqe->node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) list_add_tail_rcu(&worker->all_list, &wqe->all_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) worker->flags |= IO_WORKER_F_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) if (index == IO_WQ_ACCT_BOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) worker->flags |= IO_WORKER_F_BOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) worker->flags |= IO_WORKER_F_FIXED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) acct->nr_workers++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) if (index == IO_WQ_ACCT_UNBOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) atomic_inc(&wq->user->processes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) refcount_inc(&wq->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) wake_up_process(worker->task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) __must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) struct io_wqe_acct *acct = &wqe->acct[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) /* if we have available workers or no work, no need */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) return acct->nr_workers < acct->max_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) send_sig(SIGINT, worker->task, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) * Iterate the passed in list and call the specific function for each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) * worker that isn't exiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) static bool io_wq_for_each_worker(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) bool (*func)(struct io_worker *, void *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) struct io_worker *worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) if (io_worker_get(worker)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) /* no task if node is/was offline */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) if (worker->task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) ret = func(worker, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) io_worker_release(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) static bool io_wq_worker_wake(struct io_worker *worker, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) wake_up_process(worker->task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) * Manager thread. Tasked with creating new workers, if we need them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) static int io_wq_manager(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) struct io_wq *wq = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) /* create fixed workers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) refcount_set(&wq->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) if (!node_online(node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) if (create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) set_bit(IO_WQ_BIT_ERROR, &wq->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) set_bit(IO_WQ_BIT_EXIT, &wq->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) complete(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) while (!kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) if (current->task_works)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) task_work_run();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) struct io_wqe *wqe = wq->wqes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) bool fork_worker[2] = { false, false };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) if (!node_online(node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) fork_worker[IO_WQ_ACCT_BOUND] = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) fork_worker[IO_WQ_ACCT_UNBOUND] = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) if (fork_worker[IO_WQ_ACCT_BOUND])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) if (fork_worker[IO_WQ_ACCT_UNBOUND])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) create_io_worker(wq, wqe, IO_WQ_ACCT_UNBOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) set_current_state(TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) schedule_timeout(HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) if (current->task_works)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) task_work_run();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) if (refcount_dec_and_test(&wq->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) complete(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) /* if ERROR is set and we get here, we have workers to wake */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) for_each_node(node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) bool free_worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) if (!(work->flags & IO_WQ_WORK_UNBOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) if (atomic_read(&acct->nr_running))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) free_worker = !hlist_nulls_empty(&wqe->free_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) if (free_worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) if (atomic_read(&wqe->wq->user->processes) >= acct->max_workers &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) !(capable(CAP_SYS_RESOURCE) || capable(CAP_SYS_ADMIN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) struct io_wq *wq = wqe->wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) struct io_wq_work *old_work = work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) work->flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) work = wq->do_work(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) wq->free_work(old_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) } while (work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) struct io_wq_work *tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) if (!io_wq_is_hashed(work)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) append:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) wq_list_add_tail(&work->list, &wqe->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) hash = io_get_work_hash(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) tail = wqe->hash_tail[hash];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) wqe->hash_tail[hash] = work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) if (!tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) goto append;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) bool do_wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) * Do early check to see if we need a new unbound worker, and if we do,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) * if we're allowed to do so. This isn't 100% accurate as there's a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) * gap between this check and incrementing the value, but that's OK.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) * It's close enough to not be an issue, fork() has the same delay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) if (unlikely(!io_wq_can_queue(wqe, acct, work))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) io_run_cancel(work, wqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) raw_spin_lock_irqsave(&wqe->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) io_wqe_insert_work(wqe, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) wqe->flags &= ~IO_WQE_FLAG_STALLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) !atomic_read(&acct->nr_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) raw_spin_unlock_irqrestore(&wqe->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) if (do_wake)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) io_wqe_wake_worker(wqe, acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) struct io_wqe *wqe = wq->wqes[numa_node_id()];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) io_wqe_enqueue(wqe, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) * Work items that hash to the same value will not be done in parallel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) * Used to limit concurrent writes, generally hashed by inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) void io_wq_hash_work(struct io_wq_work *work, void *val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) unsigned int bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) bit = hash_ptr(val, IO_WQ_HASH_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) void io_wq_cancel_all(struct io_wq *wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) set_bit(IO_WQ_BIT_CANCEL, &wq->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) struct io_wqe *wqe = wq->wqes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) io_wq_for_each_worker(wqe, io_wqe_worker_send_sig, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) struct io_cb_cancel_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) work_cancel_fn *fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) void *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) int nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) int nr_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) bool cancel_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) struct io_cb_cancel_data *match = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) * Hold the lock to avoid ->cur_work going out of scope, caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) * may dereference the passed in work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) spin_lock_irqsave(&worker->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) if (worker->cur_work &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) !(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) match->fn(worker->cur_work, match->data)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) send_sig(SIGINT, worker->task, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) match->nr_running++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) spin_unlock_irqrestore(&worker->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) return match->nr_running && !match->cancel_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) static inline void io_wqe_remove_pending(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) struct io_wq_work *work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) struct io_wq_work_node *prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) unsigned int hash = io_get_work_hash(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) struct io_wq_work *prev_work = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) if (prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) prev_work = container_of(prev, struct io_wq_work, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) if (prev_work && io_get_work_hash(prev_work) == hash)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) wqe->hash_tail[hash] = prev_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) wqe->hash_tail[hash] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) wq_list_del(&wqe->work_list, &work->list, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) struct io_cb_cancel_data *match)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) struct io_wq_work_node *node, *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) struct io_wq_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) raw_spin_lock_irqsave(&wqe->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) wq_list_for_each(node, prev, &wqe->work_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) work = container_of(node, struct io_wq_work, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) if (!match->fn(work, match->data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) io_wqe_remove_pending(wqe, work, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) raw_spin_unlock_irqrestore(&wqe->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) io_run_cancel(work, wqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) match->nr_pending++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) if (!match->cancel_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) /* not safe to continue after unlock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) raw_spin_unlock_irqrestore(&wqe->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) static void io_wqe_cancel_running_work(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) struct io_cb_cancel_data *match)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) io_wq_for_each_worker(wqe, io_wq_worker_cancel, match);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) void *data, bool cancel_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) struct io_cb_cancel_data match = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) .fn = cancel,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) .data = data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) .cancel_all = cancel_all,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) * First check pending list, if we're lucky we can just remove it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) * from there. CANCEL_OK means that the work is returned as-new,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) * no completion will be posted for it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) struct io_wqe *wqe = wq->wqes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) io_wqe_cancel_pending_work(wqe, &match);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) if (match.nr_pending && !match.cancel_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) return IO_WQ_CANCEL_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) * Now check if a free (going busy) or busy worker has the work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) * currently running. If we find it there, we'll return CANCEL_RUNNING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) * as an indication that we attempt to signal cancellation. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) * completion will run normally in this case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) struct io_wqe *wqe = wq->wqes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) io_wqe_cancel_running_work(wqe, &match);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) if (match.nr_running && !match.cancel_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) return IO_WQ_CANCEL_RUNNING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) if (match.nr_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) return IO_WQ_CANCEL_RUNNING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) if (match.nr_pending)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) return IO_WQ_CANCEL_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) return IO_WQ_CANCEL_NOTFOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) int ret = -ENOMEM, node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) struct io_wq *wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) if (WARN_ON_ONCE(!data->free_work || !data->do_work))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) if (WARN_ON_ONCE(!bounded))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) wq = kzalloc(sizeof(*wq), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) if (!wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) if (!wq->wqes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) goto err_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) goto err_wqes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) wq->free_work = data->free_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) wq->do_work = data->do_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) /* caller must already hold a reference to this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) wq->user = data->user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) struct io_wqe *wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) int alloc_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) if (!node_online(alloc_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) alloc_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) if (!wqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) wq->wqes[node] = wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) wqe->node = alloc_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) if (wq->user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) task_rlimit(current, RLIMIT_NPROC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) wqe->wq = wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) raw_spin_lock_init(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) INIT_WQ_LIST(&wqe->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) INIT_LIST_HEAD(&wqe->all_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) init_completion(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) if (!IS_ERR(wq->manager)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) wake_up_process(wq->manager);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) wait_for_completion(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) refcount_set(&wq->use_refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) reinit_completion(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) return wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) ret = PTR_ERR(wq->manager);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) complete(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) for_each_node(node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) kfree(wq->wqes[node]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) err_wqes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) kfree(wq->wqes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) err_wq:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) kfree(wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) if (data->free_work != wq->free_work || data->do_work != wq->do_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) return refcount_inc_not_zero(&wq->use_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) static void __io_wq_destroy(struct io_wq *wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) set_bit(IO_WQ_BIT_EXIT, &wq->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) if (wq->manager)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) kthread_stop(wq->manager);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) for_each_node(node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) wait_for_completion(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) for_each_node(node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) kfree(wq->wqes[node]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) kfree(wq->wqes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) kfree(wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) void io_wq_destroy(struct io_wq *wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) if (refcount_dec_and_test(&wq->use_refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) __io_wq_destroy(wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) struct task_struct *io_wq_get_task(struct io_wq *wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) return wq->manager;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) struct task_struct *task = worker->task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) struct rq *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) rq = task_rq_lock(task, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) task->flags |= PF_NO_SETAFFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) task_rq_unlock(rq, task, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) for_each_node(i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) static __init int io_wq_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) io_wq_cpu_online, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) io_wq_online = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) subsys_initcall(io_wq_init);