Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * Basic worker thread pool for io_uring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * Copyright (C) 2019 Jens Axboe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/rculist_nulls.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/fs_struct.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/task_work.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/blk-cgroup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/audit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/cpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include "../kernel/sched/sched.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include "io-wq.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #define WORKER_IDLE_TIMEOUT	(5 * HZ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) 	IO_WORKER_F_UP		= 1,	/* up and active */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) 	IO_WORKER_F_RUNNING	= 2,	/* account as running */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) 	IO_WORKER_F_FREE	= 4,	/* worker on free list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) 	IO_WORKER_F_FIXED	= 8,	/* static idle worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) 	IO_WORKER_F_BOUND	= 16,	/* is doing bounded work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) 	IO_WQ_BIT_EXIT		= 0,	/* wq exiting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) 	IO_WQ_BIT_CANCEL	= 1,	/* cancel work on list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) 	IO_WQ_BIT_ERROR		= 2,	/* error on setup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) 	IO_WQE_FLAG_STALLED	= 1,	/* stalled on hash */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48)  * One for each thread in a wqe pool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) struct io_worker {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 	refcount_t ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) 	unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 	struct hlist_nulls_node nulls_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 	struct list_head all_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) 	struct task_struct *task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 	struct io_wqe *wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 	struct io_wq_work *cur_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 	spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 	struct rcu_head rcu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 	struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 	struct cgroup_subsys_state *blkcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 	const struct cred *cur_creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 	const struct cred *saved_creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 	struct files_struct *restore_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 	struct nsproxy *restore_nsproxy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 	struct fs_struct *restore_fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) #if BITS_PER_LONG == 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) #define IO_WQ_HASH_ORDER	6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) #define IO_WQ_HASH_ORDER	5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) #define IO_WQ_NR_HASH_BUCKETS	(1u << IO_WQ_HASH_ORDER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) struct io_wqe_acct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	unsigned nr_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	unsigned max_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	atomic_t nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	IO_WQ_ACCT_BOUND,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	IO_WQ_ACCT_UNBOUND,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93)  * Per-node worker thread pool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) struct io_wqe {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 		raw_spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 		struct io_wq_work_list work_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 		unsigned long hash_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 		unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	} ____cacheline_aligned_in_smp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	struct io_wqe_acct acct[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	struct hlist_nulls_head free_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 	struct list_head all_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	struct io_wq *wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114)  * Per io_wq state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115)   */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) struct io_wq {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 	struct io_wqe **wqes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	unsigned long state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	free_work_fn *free_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	io_wq_work_fn *do_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	struct task_struct *manager;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	struct user_struct *user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	refcount_t refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	struct completion done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	struct hlist_node cpuhp_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	refcount_t use_refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) static enum cpuhp_state io_wq_online;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) static bool io_worker_get(struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	return refcount_inc_not_zero(&worker->ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) static void io_worker_release(struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	if (refcount_dec_and_test(&worker->ref))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 		wake_up_process(worker->task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147)  * Note: drops the wqe->lock if returning true! The caller must re-acquire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148)  * the lock in that case. Some callers need to restart handling if this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149)  * happens, so we can't just re-acquire the lock on behalf of the caller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	bool dropped_lock = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	if (worker->saved_creds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 		revert_creds(worker->saved_creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 		worker->cur_creds = worker->saved_creds = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	if (current->files != worker->restore_files) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 		__acquire(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 		raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 		dropped_lock = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 		task_lock(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 		current->files = worker->restore_files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 		current->nsproxy = worker->restore_nsproxy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 		task_unlock(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 	if (current->fs != worker->restore_fs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 		current->fs = worker->restore_fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	 * If we have an active mm, we need to drop the wq lock before unusing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 	 * it. If we do, return true and let the caller retry the idle loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	if (worker->mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 		if (!dropped_lock) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 			__acquire(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 			raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 			dropped_lock = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 		__set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 		kthread_unuse_mm(worker->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 		mmput(worker->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 		worker->mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 	if (worker->blkcg_css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 		kthread_associate_blkcg(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 		worker->blkcg_css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 	if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 		current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	return dropped_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 						   struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	if (work->flags & IO_WQ_WORK_UNBOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 		return &wqe->acct[IO_WQ_ACCT_UNBOUND];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 	return &wqe->acct[IO_WQ_ACCT_BOUND];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 						  struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	if (worker->flags & IO_WORKER_F_BOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 		return &wqe->acct[IO_WQ_ACCT_BOUND];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	return &wqe->acct[IO_WQ_ACCT_UNBOUND];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) static void io_worker_exit(struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	struct io_wqe *wqe = worker->wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 	struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 	 * If we're not at zero, someone else is holding a brief reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 	 * to the worker. Wait for that to go away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	set_current_state(TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 	if (!refcount_dec_and_test(&worker->ref))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 		schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 	__set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 	current->flags &= ~PF_IO_WORKER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	if (worker->flags & IO_WORKER_F_RUNNING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 		atomic_dec(&acct->nr_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	if (!(worker->flags & IO_WORKER_F_BOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 		atomic_dec(&wqe->wq->user->processes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	worker->flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	hlist_nulls_del_rcu(&worker->nulls_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	list_del_rcu(&worker->all_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	if (__io_worker_unuse(wqe, worker)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 		__release(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 		raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	acct->nr_workers--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 	raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	kfree_rcu(worker, rcu);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 	if (refcount_dec_and_test(&wqe->wq->refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 		complete(&wqe->wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) static inline bool io_wqe_run_queue(struct io_wqe *wqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	__must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 	if (!wq_list_empty(&wqe->work_list) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 	    !(wqe->flags & IO_WQE_FLAG_STALLED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267)  * Check head of free list for an available worker. If one isn't available,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268)  * caller must wake up the wq manager to create one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) static bool io_wqe_activate_free_worker(struct io_wqe *wqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 	__must_hold(RCU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 	struct hlist_nulls_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 	struct io_worker *worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 	n = rcu_dereference(hlist_nulls_first_rcu(&wqe->free_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	if (is_a_nulls(n))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	worker = hlist_nulls_entry(n, struct io_worker, nulls_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	if (io_worker_get(worker)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 		wake_up_process(worker->task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 		io_worker_release(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291)  * We need a worker. If we find a free one, we're good. If not, and we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292)  * below the max number of workers, wake up the manager to create one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 	bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	 * Most likely an attempt to queue unbounded work on an io_wq that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	 * wasn't setup with any unbounded workers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	if (unlikely(!acct->max_workers))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 		pr_warn_once("io-wq is not configured for unbound workers");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	ret = io_wqe_activate_free_worker(wqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	if (!ret && acct->nr_workers < acct->max_workers)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 		wake_up_process(wqe->wq->manager);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) static void io_wqe_inc_running(struct io_wqe *wqe, struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 	atomic_inc(&acct->nr_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) static void io_wqe_dec_running(struct io_wqe *wqe, struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	__must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 	struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 		io_wqe_wake_worker(wqe, acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	allow_kernel_signal(SIGINT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	current->flags |= PF_IO_WORKER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	worker->restore_files = current->files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	worker->restore_nsproxy = current->nsproxy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	worker->restore_fs = current->fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	io_wqe_inc_running(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343)  * Worker will start processing some work. Move it to the busy list, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344)  * it's currently on the freelist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 			     struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 	__must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 	bool worker_bound, work_bound;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	if (worker->flags & IO_WORKER_F_FREE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 		worker->flags &= ~IO_WORKER_F_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 		hlist_nulls_del_init_rcu(&worker->nulls_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	 * If worker is moving from bound to unbound (or vice versa), then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	 * ensure we update the running accounting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 	worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	if (worker_bound != work_bound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 		io_wqe_dec_running(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 		if (work_bound) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 			worker->flags |= IO_WORKER_F_BOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 			wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 			wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 			atomic_dec(&wqe->wq->user->processes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 			worker->flags &= ~IO_WORKER_F_BOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 			wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 			wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 			atomic_inc(&wqe->wq->user->processes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 		io_wqe_inc_running(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 	 }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381)  * No work, worker going to sleep. Move to freelist, and unuse mm if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382)  * have one attached. Dropping the mm may potentially sleep, so we drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383)  * the lock in that case and return success. Since the caller has to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384)  * retry the loop in that case (we changed task state), we don't regrab
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385)  * the lock if we return success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	__must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	if (!(worker->flags & IO_WORKER_F_FREE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 		worker->flags |= IO_WORKER_F_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 		hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	return __io_worker_unuse(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) static inline unsigned int io_get_work_hash(struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	return work->flags >> IO_WQ_HASH_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	__must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	struct io_wq_work_node *node, *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	struct io_wq_work *work, *tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 	unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 	wq_list_for_each(node, prev, &wqe->work_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 		work = container_of(node, struct io_wq_work, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 		/* not hashed, can run anytime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 		if (!io_wq_is_hashed(work)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 			wq_list_del(&wqe->work_list, node, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 			return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 		/* hashed, can run if not already running */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 		hash = io_get_work_hash(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 		if (!(wqe->hash_map & BIT(hash))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 			wqe->hash_map |= BIT(hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 			/* all items with this hash lie in [work, tail] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 			tail = wqe->hash_tail[hash];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 			wqe->hash_tail[hash] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 			wq_list_cut(&wqe->work_list, &tail->list, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 			return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	if (worker->mm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 		kthread_unuse_mm(worker->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 		mmput(worker->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 		worker->mm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	if (mmget_not_zero(work->identity->mm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 		kthread_use_mm(work->identity->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 		worker->mm = work->identity->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	/* failed grabbing mm, ensure work gets cancelled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	work->flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) static inline void io_wq_switch_blkcg(struct io_worker *worker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 				      struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) #ifdef CONFIG_BLK_CGROUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	if (!(work->flags & IO_WQ_WORK_BLKCG))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	if (work->identity->blkcg_css != worker->blkcg_css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 		kthread_associate_blkcg(work->identity->blkcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 		worker->blkcg_css = work->identity->blkcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) static void io_wq_switch_creds(struct io_worker *worker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 			       struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	const struct cred *old_creds = override_creds(work->identity->creds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 	worker->cur_creds = work->identity->creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 	if (worker->saved_creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 		put_cred(old_creds); /* creds set by previous switch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 		worker->saved_creds = old_creds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) static void io_impersonate_work(struct io_worker *worker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 				struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	if ((work->flags & IO_WQ_WORK_FILES) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 	    current->files != work->identity->files) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 		task_lock(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 		current->files = work->identity->files;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 		current->nsproxy = work->identity->nsproxy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 		task_unlock(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 		if (!work->identity->files) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 			/* failed grabbing files, ensure work gets cancelled */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 			work->flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 		current->fs = work->identity->fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 		io_wq_switch_mm(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	if ((work->flags & IO_WQ_WORK_CREDS) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 	    worker->cur_creds != work->identity->creds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 		io_wq_switch_creds(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	if (work->flags & IO_WQ_WORK_FSIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 		current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 	else if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 		current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 	io_wq_switch_blkcg(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	current->loginuid = work->identity->loginuid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	current->sessionid = work->identity->sessionid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) static void io_assign_current_work(struct io_worker *worker,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 				   struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 	if (work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 		/* flush pending signals before assigning new work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 		if (signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 			flush_signals(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) #ifdef CONFIG_AUDIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	current->sessionid = AUDIT_SID_UNSET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 	spin_lock_irq(&worker->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 	worker->cur_work = work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 	spin_unlock_irq(&worker->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) static void io_worker_handle_work(struct io_worker *worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 	__releases(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 	struct io_wqe *wqe = worker->wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 	struct io_wq *wq = wqe->wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 		struct io_wq_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) get_next:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 		 * If we got some work, mark us as busy. If we didn't, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 		 * the list isn't empty, it means we stalled on hashed work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 		 * Mark us stalled so we don't keep looking for work when we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 		 * can't make progress, any work completion or insertion will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 		 * clear the stalled flag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 		work = io_get_next_work(wqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 		if (work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 			__io_worker_busy(wqe, worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 		else if (!wq_list_empty(&wqe->work_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 			wqe->flags |= IO_WQE_FLAG_STALLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 		raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 		if (!work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 		io_assign_current_work(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 		/* handle a whole dependent link */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 			struct io_wq_work *old_work, *next_hashed, *linked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 			unsigned int hash = io_get_work_hash(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 			next_hashed = wq_next_work(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 			io_impersonate_work(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 			 * OK to set IO_WQ_WORK_CANCEL even for uncancellable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 			 * work, the worker function will do the right thing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 			if (test_bit(IO_WQ_BIT_CANCEL, &wq->state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 				work->flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 			old_work = work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 			linked = wq->do_work(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 			work = next_hashed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 			if (!work && linked && !io_wq_is_hashed(linked)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 				work = linked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 				linked = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 			io_assign_current_work(worker, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 			wq->free_work(old_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 			if (linked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 				io_wqe_enqueue(wqe, linked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 			if (hash != -1U && !next_hashed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 				raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 				wqe->hash_map &= ~BIT_ULL(hash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 				wqe->flags &= ~IO_WQE_FLAG_STALLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 				/* skip unnecessary unlock-lock wqe->lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 				if (!work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 					goto get_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 				raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 		} while (work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 		raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 	} while (1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) static int io_wqe_worker(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 	struct io_worker *worker = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	struct io_wqe *wqe = worker->wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 	struct io_wq *wq = wqe->wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	io_worker_start(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 	while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 		set_current_state(TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 		raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 		if (io_wqe_run_queue(wqe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 			__set_current_state(TASK_RUNNING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 			io_worker_handle_work(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 			goto loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 		/* drops the lock on success, retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 		if (__io_worker_idle(wqe, worker)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 			__release(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 			goto loop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 		raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 		if (signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 			flush_signals(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 		if (schedule_timeout(WORKER_IDLE_TIMEOUT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 		/* timed out, exit unless we're the fixed worker */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 		if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 		    !(worker->flags & IO_WORKER_F_FIXED))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	if (test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 		raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 		if (!wq_list_empty(&wqe->work_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 			io_worker_handle_work(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 			raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	io_worker_exit(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647)  * Called when a worker is scheduled in. Mark us as currently running.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) void io_wq_worker_running(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 	struct io_worker *worker = kthread_data(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	struct io_wqe *wqe = worker->wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	if (!(worker->flags & IO_WORKER_F_UP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 	if (worker->flags & IO_WORKER_F_RUNNING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 	worker->flags |= IO_WORKER_F_RUNNING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	io_wqe_inc_running(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663)  * Called when worker is going to sleep. If there are no workers currently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664)  * running and we have work pending, wake up a free one or have the manager
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665)  * set one up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) void io_wq_worker_sleeping(struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	struct io_worker *worker = kthread_data(tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 	struct io_wqe *wqe = worker->wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 	if (!(worker->flags & IO_WORKER_F_UP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 	if (!(worker->flags & IO_WORKER_F_RUNNING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 	worker->flags &= ~IO_WORKER_F_RUNNING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 	io_wqe_dec_running(wqe, worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 	raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	struct io_wqe_acct *acct = &wqe->acct[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	struct io_worker *worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 	worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 	if (!worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	refcount_set(&worker->ref, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	worker->nulls_node.pprev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	worker->wqe = wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	spin_lock_init(&worker->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 	worker->task = kthread_create_on_node(io_wqe_worker, worker, wqe->node,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 				"io_wqe_worker-%d/%d", index, wqe->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	if (IS_ERR(worker->task)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 		kfree(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 	kthread_bind_mask(worker->task, cpumask_of_node(wqe->node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 	raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 	hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	list_add_tail_rcu(&worker->all_list, &wqe->all_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	worker->flags |= IO_WORKER_F_FREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 	if (index == IO_WQ_ACCT_BOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 		worker->flags |= IO_WORKER_F_BOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 	if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 		worker->flags |= IO_WORKER_F_FIXED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	acct->nr_workers++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 	if (index == IO_WQ_ACCT_UNBOUND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 		atomic_inc(&wq->user->processes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 	refcount_inc(&wq->refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 	wake_up_process(worker->task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 	__must_hold(wqe->lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 	struct io_wqe_acct *acct = &wqe->acct[index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	/* if we have available workers or no work, no need */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	return acct->nr_workers < acct->max_workers;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) static bool io_wqe_worker_send_sig(struct io_worker *worker, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	send_sig(SIGINT, worker->task, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743)  * Iterate the passed in list and call the specific function for each
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744)  * worker that isn't exiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) static bool io_wq_for_each_worker(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 				  bool (*func)(struct io_worker *, void *),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 				  void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	struct io_worker *worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	bool ret = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	list_for_each_entry_rcu(worker, &wqe->all_list, all_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 		if (io_worker_get(worker)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 			/* no task if node is/was offline */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 			if (worker->task)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 				ret = func(worker, data);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 			io_worker_release(worker);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 			if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) static bool io_wq_worker_wake(struct io_worker *worker, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	wake_up_process(worker->task);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774)  * Manager thread. Tasked with creating new workers, if we need them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) static int io_wq_manager(void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	struct io_wq *wq = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	/* create fixed workers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 	refcount_set(&wq->refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 		if (!node_online(node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 		if (create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 		set_bit(IO_WQ_BIT_ERROR, &wq->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 		set_bit(IO_WQ_BIT_EXIT, &wq->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 	complete(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 	while (!kthread_should_stop()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 		if (current->task_works)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 			task_work_run();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 		for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 			struct io_wqe *wqe = wq->wqes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 			bool fork_worker[2] = { false, false };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 			if (!node_online(node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 			raw_spin_lock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 			if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 				fork_worker[IO_WQ_ACCT_BOUND] = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 			if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 				fork_worker[IO_WQ_ACCT_UNBOUND] = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 			raw_spin_unlock_irq(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 			if (fork_worker[IO_WQ_ACCT_BOUND])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 				create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 			if (fork_worker[IO_WQ_ACCT_UNBOUND])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 				create_io_worker(wq, wqe, IO_WQ_ACCT_UNBOUND);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 		set_current_state(TASK_INTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 		schedule_timeout(HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	if (current->task_works)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 		task_work_run();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	if (refcount_dec_and_test(&wq->refs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 		complete(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	/* if ERROR is set and we get here, we have workers to wake */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 	if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 		for_each_node(node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 			io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 			    struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 	bool free_worker;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 	if (!(work->flags & IO_WQ_WORK_UNBOUND))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 	if (atomic_read(&acct->nr_running))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 	free_worker = !hlist_nulls_empty(&wqe->free_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 	if (free_worker)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	if (atomic_read(&wqe->wq->user->processes) >= acct->max_workers &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	    !(capable(CAP_SYS_RESOURCE) || capable(CAP_SYS_ADMIN)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	struct io_wq *wq = wqe->wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 		struct io_wq_work *old_work = work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 		work->flags |= IO_WQ_WORK_CANCEL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 		work = wq->do_work(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 		wq->free_work(old_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 	} while (work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) static void io_wqe_insert_work(struct io_wqe *wqe, struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	unsigned int hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	struct io_wq_work *tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	if (!io_wq_is_hashed(work)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) append:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 		wq_list_add_tail(&work->list, &wqe->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 	hash = io_get_work_hash(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	tail = wqe->hash_tail[hash];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	wqe->hash_tail[hash] = work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	if (!tail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 		goto append;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 	struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 	bool do_wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	 * Do early check to see if we need a new unbound worker, and if we do,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	 * if we're allowed to do so. This isn't 100% accurate as there's a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	 * gap between this check and incrementing the value, but that's OK.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	 * It's close enough to not be an issue, fork() has the same delay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	if (unlikely(!io_wq_can_queue(wqe, acct, work))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 		io_run_cancel(work, wqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	raw_spin_lock_irqsave(&wqe->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	io_wqe_insert_work(wqe, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	wqe->flags &= ~IO_WQE_FLAG_STALLED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	do_wake = (work->flags & IO_WQ_WORK_CONCURRENT) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 			!atomic_read(&acct->nr_running);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 	raw_spin_unlock_irqrestore(&wqe->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	if (do_wake)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 		io_wqe_wake_worker(wqe, acct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 	struct io_wqe *wqe = wq->wqes[numa_node_id()];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	io_wqe_enqueue(wqe, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931)  * Work items that hash to the same value will not be done in parallel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932)  * Used to limit concurrent writes, generally hashed by inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) void io_wq_hash_work(struct io_wq_work *work, void *val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	unsigned int bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	bit = hash_ptr(val, IO_WQ_HASH_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) void io_wq_cancel_all(struct io_wq *wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	set_bit(IO_WQ_BIT_CANCEL, &wq->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 	for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 		struct io_wqe *wqe = wq->wqes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 		io_wq_for_each_worker(wqe, io_wqe_worker_send_sig, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) struct io_cb_cancel_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 	work_cancel_fn *fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 	void *data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 	int nr_running;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 	int nr_pending;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 	bool cancel_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 	struct io_cb_cancel_data *match = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 	 * Hold the lock to avoid ->cur_work going out of scope, caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	 * may dereference the passed in work.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 	spin_lock_irqsave(&worker->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 	if (worker->cur_work &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	    !(worker->cur_work->flags & IO_WQ_WORK_NO_CANCEL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 	    match->fn(worker->cur_work, match->data)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 		send_sig(SIGINT, worker->task, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 		match->nr_running++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	spin_unlock_irqrestore(&worker->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	return match->nr_running && !match->cancel_all;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) static inline void io_wqe_remove_pending(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 					 struct io_wq_work *work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 					 struct io_wq_work_node *prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	unsigned int hash = io_get_work_hash(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	struct io_wq_work *prev_work = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 	if (io_wq_is_hashed(work) && work == wqe->hash_tail[hash]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 		if (prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 			prev_work = container_of(prev, struct io_wq_work, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 		if (prev_work && io_get_work_hash(prev_work) == hash)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 			wqe->hash_tail[hash] = prev_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 			wqe->hash_tail[hash] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	wq_list_del(&wqe->work_list, &work->list, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 				       struct io_cb_cancel_data *match)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	struct io_wq_work_node *node, *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 	struct io_wq_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	raw_spin_lock_irqsave(&wqe->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	wq_list_for_each(node, prev, &wqe->work_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 		work = container_of(node, struct io_wq_work, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 		if (!match->fn(work, match->data))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 		io_wqe_remove_pending(wqe, work, prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 		raw_spin_unlock_irqrestore(&wqe->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 		io_run_cancel(work, wqe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 		match->nr_pending++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 		if (!match->cancel_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 		/* not safe to continue after unlock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 	raw_spin_unlock_irqrestore(&wqe->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) static void io_wqe_cancel_running_work(struct io_wqe *wqe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 				       struct io_cb_cancel_data *match)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	io_wq_for_each_worker(wqe, io_wq_worker_cancel, match);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 				  void *data, bool cancel_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	struct io_cb_cancel_data match = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 		.fn		= cancel,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 		.data		= data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 		.cancel_all	= cancel_all,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 	 * First check pending list, if we're lucky we can just remove it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	 * from there. CANCEL_OK means that the work is returned as-new,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	 * no completion will be posted for it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 		struct io_wqe *wqe = wq->wqes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 		io_wqe_cancel_pending_work(wqe, &match);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		if (match.nr_pending && !match.cancel_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 			return IO_WQ_CANCEL_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	 * Now check if a free (going busy) or busy worker has the work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	 * currently running. If we find it there, we'll return CANCEL_RUNNING
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	 * as an indication that we attempt to signal cancellation. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	 * completion will run normally in this case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 		struct io_wqe *wqe = wq->wqes[node];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 		io_wqe_cancel_running_work(wqe, &match);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 		if (match.nr_running && !match.cancel_all)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 			return IO_WQ_CANCEL_RUNNING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	if (match.nr_running)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 		return IO_WQ_CANCEL_RUNNING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	if (match.nr_pending)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 		return IO_WQ_CANCEL_OK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	return IO_WQ_CANCEL_NOTFOUND;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 	int ret = -ENOMEM, node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 	struct io_wq *wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 	if (WARN_ON_ONCE(!data->free_work || !data->do_work))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 	if (WARN_ON_ONCE(!bounded))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	if (!wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	wq->wqes = kcalloc(nr_node_ids, sizeof(struct io_wqe *), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	if (!wq->wqes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 		goto err_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 		goto err_wqes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 	wq->free_work = data->free_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 	wq->do_work = data->do_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	/* caller must already hold a reference to this */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 	wq->user = data->user;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	for_each_node(node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 		struct io_wqe *wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 		int alloc_node = node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 		if (!node_online(alloc_node))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 			alloc_node = NUMA_NO_NODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 		wqe = kzalloc_node(sizeof(struct io_wqe), GFP_KERNEL, alloc_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 		if (!wqe)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 		wq->wqes[node] = wqe;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 		wqe->node = alloc_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 		wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 		atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 		if (wq->user) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 			wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 					task_rlimit(current, RLIMIT_NPROC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 		atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 		wqe->wq = wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 		raw_spin_lock_init(&wqe->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 		INIT_WQ_LIST(&wqe->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 		INIT_HLIST_NULLS_HEAD(&wqe->free_list, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 		INIT_LIST_HEAD(&wqe->all_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	init_completion(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	if (!IS_ERR(wq->manager)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 		wake_up_process(wq->manager);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 		wait_for_completion(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 		if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 			ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 			goto err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 		refcount_set(&wq->use_refs, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 		reinit_completion(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 		return wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	ret = PTR_ERR(wq->manager);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 	complete(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 	cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	for_each_node(node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 		kfree(wq->wqes[node]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) err_wqes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	kfree(wq->wqes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) err_wq:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 	kfree(wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	if (data->free_work != wq->free_work || data->do_work != wq->do_work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	return refcount_inc_not_zero(&wq->use_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) static void __io_wq_destroy(struct io_wq *wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	int node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	set_bit(IO_WQ_BIT_EXIT, &wq->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	if (wq->manager)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 		kthread_stop(wq->manager);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	for_each_node(node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 		io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	wait_for_completion(&wq->done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	for_each_node(node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 		kfree(wq->wqes[node]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	kfree(wq->wqes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	kfree(wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) void io_wq_destroy(struct io_wq *wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	if (refcount_dec_and_test(&wq->use_refs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 		__io_wq_destroy(wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) struct task_struct *io_wq_get_task(struct io_wq *wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	return wq->manager;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 	struct task_struct *task = worker->task;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	struct rq_flags rf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	struct rq *rq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	rq = task_rq_lock(task, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 	do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	task->flags |= PF_NO_SETAFFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	task_rq_unlock(rq, task, &rf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 	struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 	for_each_node(i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 		io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) static __init int io_wq_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 					io_wq_cpu_online, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	io_wq_online = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) subsys_initcall(io_wq_init);