^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2019 Oracle. All Rights Reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Author: Darrick J. Wong <darrick.wong@oracle.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include "xfs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include "xfs_fs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "xfs_shared.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include "xfs_format.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include "xfs_log_format.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "xfs_trans_resv.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "xfs_mount.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "xfs_trace.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "xfs_sysctl.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "xfs_pwork.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/nmi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * Parallel Work Queue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * ===================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * Abstract away the details of running a large and "obviously" parallelizable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * task across multiple CPUs. Callers initialize the pwork control object with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * a desired level of parallelization and a work function. Next, they embed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * struct xfs_pwork in whatever structure they use to pass work context to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * worker thread and queue that pwork. The work function will be passed the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * pwork item when it is run (from process context) and any returned error will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * be recorded in xfs_pwork_ctl.error. Work functions should check for errors
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * and abort if necessary; the non-zeroness of xfs_pwork_ctl.error does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * stop workqueue item processing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * This is the rough equivalent of the xfsprogs workqueue code, though we can't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * reuse that name here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) /* Invoke our caller's function. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) xfs_pwork_work(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) struct xfs_pwork *pwork;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) struct xfs_pwork_ctl *pctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) pwork = container_of(work, struct xfs_pwork, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) pctl = pwork->pctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) error = pctl->work_fn(pctl->mp, pwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) if (error && !pctl->error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) pctl->error = error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) if (atomic_dec_and_test(&pctl->nr_work))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) wake_up(&pctl->poll_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * Set up control data for parallel work. @work_fn is the function that will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * be called. @tag will be written into the kernel threads. @nr_threads is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * the level of parallelism desired, or 0 for no limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) xfs_pwork_init(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) struct xfs_mount *mp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) struct xfs_pwork_ctl *pctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) xfs_pwork_work_fn work_fn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) const char *tag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) unsigned int nr_threads)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #ifdef DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) if (xfs_globals.pwork_threads >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) nr_threads = xfs_globals.pwork_threads;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) trace_xfs_pwork_init(mp, nr_threads, current->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) pctl->wq = alloc_workqueue("%s-%d", WQ_FREEZABLE, nr_threads, tag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) current->pid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) if (!pctl->wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) pctl->work_fn = work_fn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) pctl->error = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) pctl->mp = mp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) atomic_set(&pctl->nr_work, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) init_waitqueue_head(&pctl->poll_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) /* Queue some parallel work. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) xfs_pwork_queue(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) struct xfs_pwork_ctl *pctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) struct xfs_pwork *pwork)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) INIT_WORK(&pwork->work, xfs_pwork_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) pwork->pctl = pctl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) atomic_inc(&pctl->nr_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) queue_work(pctl->wq, &pwork->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) /* Wait for the work to finish and tear down the control structure. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) xfs_pwork_destroy(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) struct xfs_pwork_ctl *pctl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) destroy_workqueue(pctl->wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) pctl->wq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) return pctl->error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * Wait for the work to finish by polling completion status and touch the soft
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * lockup watchdog. This is for callers such as mount which hold locks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) xfs_pwork_poll(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) struct xfs_pwork_ctl *pctl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) while (wait_event_timeout(pctl->poll_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) atomic_read(&pctl->nr_work) == 0, HZ) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) touch_softlockup_watchdog();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * Return the amount of parallelism that the data device can handle, or 0 for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * no limit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) unsigned int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) xfs_pwork_guess_datadev_parallelism(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) struct xfs_mount *mp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) struct xfs_buftarg *btp = mp->m_ddev_targp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * For now we'll go with the most conservative setting possible,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) * which is two threads for an SSD and 1 thread everywhere else.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) return blk_queue_nonrot(btp->bt_bdev->bd_disk->queue) ? 2 : 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }