^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * fs/fs-writeback.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2002, Linus Torvalds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Contains all the functions related to writing back and waiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * upon dirty inodes against superblocks, and writing back dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * pages against inodes. ie: data writeback. Writeout of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * inode itself is not handled here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * 10Apr2002 Andrew Morton
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Split out of fs/inode.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Additions for address_space-based writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/tracepoint.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <linux/device.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #include <linux/memcontrol.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * 4MB minimal write chunk size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * Passed into wb_writeback(), essentially a subset of writeback_control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) struct wb_writeback_work {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) long nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) enum writeback_sync_modes sync_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) unsigned int tagged_writepages:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) unsigned int for_kupdate:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) unsigned int range_cyclic:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) unsigned int for_background:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) unsigned int for_sync:1; /* sync(2) WB_SYNC_ALL writeback */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) unsigned int auto_free:1; /* free on completion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) enum wb_reason reason; /* why was writeback initiated? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) struct list_head list; /* pending work list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) struct wb_completion *done; /* set if the caller waits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * If an inode is constantly having its pages dirtied, but then the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * updates stop dirtytime_expire_interval seconds in the past, it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * possible for the worst case time between when an inode has its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * timestamps updated and when they finally get written out to be two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * dirtytime_expire_intervals. We set the default to 12 hours (in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * seconds), which means most of the time inodes will have their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * timestamps written to disk after 12 hours, but in the worst case a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * few inodes might not their timestamps updated for 24 hours.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) unsigned int dirtytime_expire_interval = 12 * 60 * 60;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) static inline struct inode *wb_inode(struct list_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) return list_entry(head, struct inode, i_io_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * Include the creation of the trace points after defining the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * wb_writeback_work structure and inline functions so that the definition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * remains local to this file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #include <trace/events/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) static bool wb_io_lists_populated(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) if (wb_has_dirty_io(wb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) set_bit(WB_has_dirty_io, &wb->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) WARN_ON_ONCE(!wb->avg_write_bandwidth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) atomic_long_add(wb->avg_write_bandwidth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) &wb->bdi->tot_write_bandwidth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) static void wb_io_lists_depopulated(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) clear_bit(WB_has_dirty_io, &wb->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) &wb->bdi->tot_write_bandwidth) < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * @inode: inode to be moved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * @wb: target bdi_writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) * @head: one of @wb->b_{dirty|io|more_io|dirty_time}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * Returns %true if @inode is the first occupant of the !dirty_time IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * lists; otherwise, %false.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) static bool inode_io_list_move_locked(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) struct list_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) assert_spin_locked(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) list_move(&inode->i_io_list, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /* dirty_time doesn't count as dirty_io until expiration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) if (head != &wb->b_dirty_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) return wb_io_lists_populated(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) wb_io_lists_depopulated(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) * @inode: inode to be removed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) * @wb: bdi_writeback @inode is being removed from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * clear %WB_has_dirty_io if all are empty afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) static void inode_io_list_del_locked(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) assert_spin_locked(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) assert_spin_locked(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) inode->i_state &= ~I_SYNC_QUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) list_del_init(&inode->i_io_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) wb_io_lists_depopulated(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) static void wb_wakeup(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) spin_lock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) if (test_bit(WB_registered, &wb->state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) mod_delayed_work(bdi_wq, &wb->dwork, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) spin_unlock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) static void finish_writeback_work(struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) struct wb_completion *done = work->done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) if (work->auto_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) kfree(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) if (done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) wait_queue_head_t *waitq = done->waitq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) /* @done can't be accessed after the following dec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) if (atomic_dec_and_test(&done->cnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) wake_up_all(waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) static void wb_queue_work(struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) trace_writeback_queue(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) if (work->done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) atomic_inc(&work->done->cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) spin_lock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) if (test_bit(WB_registered, &wb->state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) list_add_tail(&work->list, &wb->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) mod_delayed_work(bdi_wq, &wb->dwork, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) finish_writeback_work(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) spin_unlock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * wb_wait_for_completion - wait for completion of bdi_writeback_works
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * @done: target wb_completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * Wait for one or more work items issued to @bdi with their ->done field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * set to @done, which should have been initialized with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * DEFINE_WB_COMPLETION(). This function returns after all such work items
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * are completed. Work items which are waited upon aren't freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * automatically on completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) void wb_wait_for_completion(struct wb_completion *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) atomic_dec(&done->cnt); /* put down the initial count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) wait_event(*done->waitq, !atomic_read(&done->cnt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) #ifdef CONFIG_CGROUP_WRITEBACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * Parameters for foreign inode detection, see wbc_detach_inode() to see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) * how they're used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * These paramters are inherently heuristical as the detection target
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * itself is fuzzy. All we want to do is detaching an inode from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * current owner if it's being written to by some other cgroups too much.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * The current cgroup writeback is built on the assumption that multiple
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * cgroups writing to the same inode concurrently is very rare and a mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * of operation which isn't well supported. As such, the goal is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * taking too long when a different cgroup takes over an inode while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * avoiding too aggressive flip-flops from occasional foreign writes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * We record, very roughly, 2s worth of IO time history and if more than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * half of that is foreign, trigger the switch. The recording is quantized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * to 16 slots. To avoid tiny writes from swinging the decision too much,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * writes smaller than 1/8 of avg size are ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) #define WB_FRN_TIME_SHIFT 13 /* 1s = 2^13, upto 8 secs w/ 16bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) #define WB_FRN_TIME_AVG_SHIFT 3 /* avg = avg * 7/8 + new * 1/8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) #define WB_FRN_TIME_CUT_DIV 8 /* ignore rounds < avg / 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) #define WB_FRN_TIME_PERIOD (2 * (1 << WB_FRN_TIME_SHIFT)) /* 2s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) #define WB_FRN_HIST_SLOTS 16 /* inode->i_wb_frn_history is 16bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) #define WB_FRN_HIST_UNIT (WB_FRN_TIME_PERIOD / WB_FRN_HIST_SLOTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) /* each slot's duration is 2s / 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) #define WB_FRN_HIST_THR_SLOTS (WB_FRN_HIST_SLOTS / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) /* if foreign slots >= 8, switch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) #define WB_FRN_HIST_MAX_SLOTS (WB_FRN_HIST_THR_SLOTS / 2 + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /* one round can affect upto 5 slots */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) #define WB_FRN_MAX_IN_FLIGHT 1024 /* don't queue too many concurrently */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) static struct workqueue_struct *isw_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) void __inode_attach_wb(struct inode *inode, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) struct backing_dev_info *bdi = inode_to_bdi(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) struct bdi_writeback *wb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) if (inode_cgwb_enabled(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) struct cgroup_subsys_state *memcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) memcg_css = mem_cgroup_css_from_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) /* must pin memcg_css, see wb_get_create() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) memcg_css = task_get_css(current, memory_cgrp_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) css_put(memcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) if (!wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) wb = &bdi->wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * There may be multiple instances of this function racing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * update the same inode. Use cmpxchg() to tell the winner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) wb_put(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) EXPORT_SYMBOL_GPL(__inode_attach_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) * @inode: inode of interest with i_lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * Returns @inode's wb with its list_lock held. @inode->i_lock must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * held on entry and is released on return. The returned wb is guaranteed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) * to stay @inode's associated wb until its list_lock is released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) static struct bdi_writeback *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) locked_inode_to_wb_and_lock_list(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) __releases(&inode->i_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) __acquires(&wb->list_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) struct bdi_writeback *wb = inode_to_wb(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * inode_to_wb() association is protected by both
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * @inode->i_lock and @wb->list_lock but list_lock nests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * outside i_lock. Drop i_lock and verify that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * association hasn't changed after acquiring list_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) wb_get(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) /* i_wb may have changed inbetween, can't use inode_to_wb() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) if (likely(wb == inode->i_wb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) wb_put(wb); /* @inode already has ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) return wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) wb_put(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * inode_to_wb_and_lock_list - determine an inode's wb and lock it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * @inode: inode of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) * Same as locked_inode_to_wb_and_lock_list() but @inode->i_lock isn't held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * on entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) __acquires(&wb->list_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) return locked_inode_to_wb_and_lock_list(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) struct inode_switch_wbs_context {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) struct bdi_writeback *new_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) struct rcu_head rcu_head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) struct work_struct work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) down_write(&bdi->wb_switch_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) up_write(&bdi->wb_switch_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) static void inode_switch_wbs_work_fn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) struct inode_switch_wbs_context *isw =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) container_of(work, struct inode_switch_wbs_context, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) struct inode *inode = isw->inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) struct backing_dev_info *bdi = inode_to_bdi(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) struct bdi_writeback *old_wb = inode->i_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) struct bdi_writeback *new_wb = isw->new_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) XA_STATE(xas, &mapping->i_pages, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) bool switched = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * If @inode switches cgwb membership while sync_inodes_sb() is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * being issued, sync_inodes_sb() might miss it. Synchronize.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) down_read(&bdi->wb_switch_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) * By the time control reaches here, RCU grace period has passed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * since I_WB_SWITCH assertion and all wb stat update transactions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) * between unlocked_inode_to_wb_begin/end() are guaranteed to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) * synchronizing against the i_pages lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) * gives us exclusion against all wb related operations on @inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * including IO list manipulations and stat updates.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) if (old_wb < new_wb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) spin_lock(&old_wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) spin_lock(&new_wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) xa_lock_irq(&mapping->i_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) * Once I_FREEING is visible under i_lock, the eviction path owns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) * the inode and we shouldn't modify ->i_io_list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) if (unlikely(inode->i_state & I_FREEING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) goto skip_switch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) trace_inode_switch_wbs(inode, old_wb, new_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * Count and transfer stats. Note that PAGECACHE_TAG_DIRTY points
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) * pages actually under writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) if (PageDirty(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) dec_wb_stat(old_wb, WB_RECLAIMABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) inc_wb_stat(new_wb, WB_RECLAIMABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) xas_set(&xas, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) WARN_ON_ONCE(!PageWriteback(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) dec_wb_stat(old_wb, WB_WRITEBACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) inc_wb_stat(new_wb, WB_WRITEBACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) wb_get(new_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) * Transfer to @new_wb's IO list if necessary. The specific list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) * @inode was on is ignored and the inode is put on ->b_dirty which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) * is always correct including from ->b_dirty_time. The transfer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) * preserves @inode->dirtied_when ordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) if (!list_empty(&inode->i_io_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) struct inode *pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) inode_io_list_del_locked(inode, old_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) inode->i_wb = new_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) if (time_after_eq(inode->dirtied_when,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) pos->dirtied_when))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) inode->i_wb = new_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) /* ->i_wb_frn updates may race wbc_detach_inode() but doesn't matter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) inode->i_wb_frn_winner = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) inode->i_wb_frn_avg_time = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) inode->i_wb_frn_history = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) switched = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) skip_switch:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) * Paired with load_acquire in unlocked_inode_to_wb_begin() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) * ensures that the new wb is visible if they see !I_WB_SWITCH.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) xa_unlock_irq(&mapping->i_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) spin_unlock(&new_wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) spin_unlock(&old_wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) up_read(&bdi->wb_switch_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) if (switched) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) wb_wakeup(new_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) wb_put(old_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) wb_put(new_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) kfree(isw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) atomic_dec(&isw_nr_in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) struct inode_switch_wbs_context *isw = container_of(rcu_head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) struct inode_switch_wbs_context, rcu_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) /* needs to grab bh-unsafe locks, bounce to work item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) queue_work(isw_wq, &isw->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * inode_switch_wbs - change the wb association of an inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) * @inode: target inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) * @new_wb_id: ID of the new wb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * Switch @inode's wb association to the wb identified by @new_wb_id. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) * switching is performed asynchronously and may fail silently.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) static void inode_switch_wbs(struct inode *inode, int new_wb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) struct backing_dev_info *bdi = inode_to_bdi(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) struct cgroup_subsys_state *memcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) struct inode_switch_wbs_context *isw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) /* noop if seems to be already in progress */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) if (inode->i_state & I_WB_SWITCH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) /* avoid queueing a new switch if too many are already in flight */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) if (!isw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) atomic_inc(&isw_nr_in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) /* find and pin the new wb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) if (memcg_css && !css_tryget(memcg_css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) memcg_css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) if (!memcg_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) css_put(memcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (!isw->new_wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) /* while holding I_WB_SWITCH, no one else can update the association */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) inode->i_state & (I_WB_SWITCH | I_FREEING) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) inode_to_wb(inode) == isw->new_wb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) inode->i_state |= I_WB_SWITCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) __iget(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) isw->inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) * In addition to synchronizing among switchers, I_WB_SWITCH tells
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) * the RCU protected stat update paths to grab the i_page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) * lock so that stat transfer can synchronize against them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) * Let's continue after I_WB_SWITCH is guaranteed to be visible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) atomic_dec(&isw_nr_in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) if (isw->new_wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) wb_put(isw->new_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) kfree(isw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) * @wbc: writeback_control of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) * @inode: target inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) * @inode is locked and about to be written back under the control of @wbc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) * Record @inode's writeback context into @wbc and unlock the i_lock. On
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) * writeback completion, wbc_detach_inode() should be called. This is used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) * to track the cgroup writeback context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) if (!inode_cgwb_enabled(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) wbc->wb = inode_to_wb(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) wbc->inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) wbc->wb_id = wbc->wb->memcg_css->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) wbc->wb_lcand_id = inode->i_wb_frn_winner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) wbc->wb_tcand_id = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) wbc->wb_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) wbc->wb_lcand_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) wbc->wb_tcand_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) wb_get(wbc->wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) * A dying wb indicates that either the blkcg associated with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) * memcg changed or the associated memcg is dying. In the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) * case, a replacement wb should already be available and we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) * refresh the wb immediately. In the second case, trying to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) * refresh will keep failing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) inode_switch_wbs(inode, wbc->wb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) * wbc_detach_inode - disassociate wbc from inode and perform foreign detection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) * @wbc: writeback_control of the just finished writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * To be called after a writeback attempt of an inode finishes and undoes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) * wbc_attach_and_unlock_inode(). Can be called under any context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) * As concurrent write sharing of an inode is expected to be very rare and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) * memcg only tracks page ownership on first-use basis severely confining
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) * the usefulness of such sharing, cgroup writeback tracks ownership
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) * per-inode. While the support for concurrent write sharing of an inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) * is deemed unnecessary, an inode being written to by different cgroups at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) * different points in time is a lot more common, and, more importantly,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) * charging only by first-use can too readily lead to grossly incorrect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) * behaviors (single foreign page can lead to gigabytes of writeback to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) * incorrectly attributed).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) * To resolve this issue, cgroup writeback detects the majority dirtier of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) * an inode and transfers the ownership to it. To avoid unnnecessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) * oscillation, the detection mechanism keeps track of history and gives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) * out the switch verdict only if the foreign usage pattern is stable over
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) * a certain amount of time and/or writeback attempts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) * On each writeback attempt, @wbc tries to detect the majority writer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) * using Boyer-Moore majority vote algorithm. In addition to the byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) * count from the majority voting, it also counts the bytes written for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * current wb and the last round's winner wb (max of last round's current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) * wb, the winner from two rounds ago, and the last round's majority
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) * candidate). Keeping track of the historical winner helps the algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) * to semi-reliably detect the most active writer even when it's not the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) * absolute majority.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) * Once the winner of the round is determined, whether the winner is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) * foreign or not and how much IO time the round consumed is recorded in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) * inode->i_wb_frn_history. If the amount of recorded foreign IO time is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) * over a certain threshold, the switch verdict is given.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) void wbc_detach_inode(struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) struct bdi_writeback *wb = wbc->wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) struct inode *inode = wbc->inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) unsigned long avg_time, max_bytes, max_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) u16 history;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) int max_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) if (!wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) history = inode->i_wb_frn_history;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) avg_time = inode->i_wb_frn_avg_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) /* pick the winner of this round */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) if (wbc->wb_bytes >= wbc->wb_lcand_bytes &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) wbc->wb_bytes >= wbc->wb_tcand_bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) max_id = wbc->wb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) max_bytes = wbc->wb_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) } else if (wbc->wb_lcand_bytes >= wbc->wb_tcand_bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) max_id = wbc->wb_lcand_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) max_bytes = wbc->wb_lcand_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) max_id = wbc->wb_tcand_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) max_bytes = wbc->wb_tcand_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) * Calculate the amount of IO time the winner consumed and fold it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) * into the running average kept per inode. If the consumed IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) * time is lower than avag / WB_FRN_TIME_CUT_DIV, ignore it for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) * deciding whether to switch or not. This is to prevent one-off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) * small dirtiers from skewing the verdict.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) max_time = DIV_ROUND_UP((max_bytes >> PAGE_SHIFT) << WB_FRN_TIME_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) wb->avg_write_bandwidth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) if (avg_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) avg_time += (max_time >> WB_FRN_TIME_AVG_SHIFT) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) (avg_time >> WB_FRN_TIME_AVG_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) avg_time = max_time; /* immediate catch up on first run */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) if (max_time >= avg_time / WB_FRN_TIME_CUT_DIV) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) int slots;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) * The switch verdict is reached if foreign wb's consume
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) * more than a certain proportion of IO time in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) * WB_FRN_TIME_PERIOD. This is loosely tracked by 16 slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) * history mask where each bit represents one sixteenth of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) * the period. Determine the number of slots to shift into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) * history from @max_time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) slots = min(DIV_ROUND_UP(max_time, WB_FRN_HIST_UNIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) (unsigned long)WB_FRN_HIST_MAX_SLOTS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) history <<= slots;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) if (wbc->wb_id != max_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) history |= (1U << slots) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) if (history)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) trace_inode_foreign_history(inode, wbc, history);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * Switch if the current wb isn't the consistent winner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) * If there are multiple closely competing dirtiers, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) * inode may switch across them repeatedly over time, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) * is okay. The main goal is avoiding keeping an inode on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) * the wrong wb for an extended period of time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) if (hweight32(history) > WB_FRN_HIST_THR_SLOTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) inode_switch_wbs(inode, max_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) * Multiple instances of this function may race to update the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) * following fields but we don't mind occassional inaccuracies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) inode->i_wb_frn_winner = max_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) inode->i_wb_frn_avg_time = min(avg_time, (unsigned long)U16_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) inode->i_wb_frn_history = history;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) wb_put(wbc->wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) wbc->wb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) EXPORT_SYMBOL_GPL(wbc_detach_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) * @wbc: writeback_control of the writeback in progress
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) * @page: page being written out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) * @bytes: number of bytes being written out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) * @bytes from @page are about to written out during the writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) * controlled by @wbc. Keep the book for foreign inode detection. See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) * wbc_detach_inode().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) size_t bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) int id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) * pageout() path doesn't attach @wbc to the inode being written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) * out. This is intentional as we don't want the function to block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) * behind a slow cgroup. Ultimately, we want pageout() to kick off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) * regular writeback instead of writing things out itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) if (!wbc->wb || wbc->no_cgroup_owner)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) css = mem_cgroup_css_from_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) /* dead cgroups shouldn't contribute to inode ownership arbitration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) if (!(css->flags & CSS_ONLINE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) id = css->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) if (id == wbc->wb_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) wbc->wb_bytes += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) if (id == wbc->wb_lcand_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) wbc->wb_lcand_bytes += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) /* Boyer-Moore majority vote algorithm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) if (!wbc->wb_tcand_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) wbc->wb_tcand_id = id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) if (id == wbc->wb_tcand_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) wbc->wb_tcand_bytes += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) * inode_congested - test whether an inode is congested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) * @inode: inode to test for congestion (may be NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) * @cong_bits: mask of WB_[a]sync_congested bits to test
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) * Tests whether @inode is congested. @cong_bits is the mask of congestion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) * bits to test and the return value is the mask of set bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) * If cgroup writeback is enabled for @inode, the congestion state is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * determined by whether the cgwb (cgroup bdi_writeback) for the blkcg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * associated with @inode is congested; otherwise, the root wb's congestion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * state is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) * @inode is allowed to be NULL as this function is often called on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) * mapping->host which is NULL for the swapper space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) int inode_congested(struct inode *inode, int cong_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) * Once set, ->i_wb never becomes NULL while the inode is alive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) * Start transaction iff ->i_wb is visible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) if (inode && inode_to_wb_is_valid(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) struct wb_lock_cookie lock_cookie = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) bool congested;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) wb = unlocked_inode_to_wb_begin(inode, &lock_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) congested = wb_congested(wb, cong_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) unlocked_inode_to_wb_end(inode, &lock_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) return congested;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) EXPORT_SYMBOL_GPL(inode_congested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) * wb_split_bdi_pages - split nr_pages to write according to bandwidth
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) * @wb: target bdi_writeback to split @nr_pages to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) * @nr_pages: number of pages to write for the whole bdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) * Split @wb's portion of @nr_pages according to @wb's write bandwidth in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) * relation to the total write bandwidth of all wb's w/ dirty inodes on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) * @wb->bdi.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) unsigned long this_bw = wb->avg_write_bandwidth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) if (nr_pages == LONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) return LONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) * This may be called on clean wb's and proportional distribution
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) * may not make sense, just use the original @nr_pages in those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) * cases. In general, we wanna err on the side of writing more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) if (!tot_bw || this_bw >= tot_bw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) return DIV_ROUND_UP_ULL((u64)nr_pages * this_bw, tot_bw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) * @bdi: target backing_dev_info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) * @base_work: wb_writeback_work to issue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) * @skip_if_busy: skip wb's which already have writeback in progress
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) * Split and issue @base_work to all wb's (bdi_writeback's) of @bdi which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) * have dirty inodes. If @base_work->nr_page isn't %LONG_MAX, it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) * distributed to the busy wbs according to each wb's proportion in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * total active write bandwidth of @bdi.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) struct wb_writeback_work *base_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) bool skip_if_busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) struct bdi_writeback *last_wb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) struct bdi_writeback *wb = list_entry(&bdi->wb_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) struct bdi_writeback, bdi_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) DEFINE_WB_COMPLETION(fallback_work_done, bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) struct wb_writeback_work fallback_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) struct wb_writeback_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) long nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) if (last_wb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) wb_put(last_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) last_wb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) /* SYNC_ALL writes out I_DIRTY_TIME too */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) if (!wb_has_dirty_io(wb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) (base_work->sync_mode == WB_SYNC_NONE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) list_empty(&wb->b_dirty_time)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) if (skip_if_busy && writeback_in_progress(wb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) work = kmalloc(sizeof(*work), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) if (work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) *work = *base_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) work->nr_pages = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) work->auto_free = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) wb_queue_work(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) /* alloc failed, execute synchronously using on-stack fallback */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) work = &fallback_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) *work = *base_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) work->nr_pages = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) work->auto_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) work->done = &fallback_work_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) wb_queue_work(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) * Pin @wb so that it stays on @bdi->wb_list. This allows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) * continuing iteration from @wb after dropping and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) * regrabbing rcu read lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) wb_get(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) last_wb = wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) wb_wait_for_completion(&fallback_work_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) if (last_wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) wb_put(last_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) * @bdi_id: target bdi id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) * @memcg_id: target memcg css id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) * @nr: number of pages to write, 0 for best-effort dirty flushing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) * @reason: reason why some writeback work initiated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) * @done: target wb_completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) * with the specified parameters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) enum wb_reason reason, struct wb_completion *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) struct backing_dev_info *bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) struct cgroup_subsys_state *memcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) struct wb_writeback_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) /* lookup bdi and memcg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) bdi = bdi_get_by_id(bdi_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) if (!bdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) if (memcg_css && !css_tryget(memcg_css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) memcg_css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) if (!memcg_css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) goto out_bdi_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) * And find the associated wb. If the wb isn't there already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) * there's nothing to flush, don't create one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) wb = wb_get_lookup(bdi, memcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) if (!wb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) goto out_css_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) * If @nr is zero, the caller is attempting to write out most of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) * the currently dirty pages. Let's take the current dirty page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) * count and inflate it by 25% which should be large enough to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) * flush out most dirty pages while avoiding getting livelocked by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) * concurrent dirtiers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) if (!nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) unsigned long filepages, headroom, dirty, writeback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) &writeback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) nr = dirty * 10 / 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) /* issue the writeback work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) if (work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) work->nr_pages = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) work->sync_mode = WB_SYNC_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) work->range_cyclic = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) work->reason = reason;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) work->done = done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) work->auto_free = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) wb_queue_work(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) wb_put(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) out_css_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) css_put(memcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) out_bdi_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) bdi_put(bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) * cgroup_writeback_umount - flush inode wb switches for umount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) * This function is called when a super_block is about to be destroyed and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) * flushes in-flight inode wb switches. An inode wb switch goes through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) * RCU and then workqueue, so the two need to be flushed in order to ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) * that all previously scheduled switches are finished. As wb switches are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) * rare occurrences and synchronize_rcu() can take a while, perform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) * flushing iff wb switches are in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) void cgroup_writeback_umount(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) if (atomic_read(&isw_nr_in_flight)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) * Use rcu_barrier() to wait for all pending callbacks to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) * ensure that all in-flight wb switches are in the workqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) rcu_barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) flush_workqueue(isw_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) static int __init cgroup_writeback_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) if (!isw_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) fs_initcall(cgroup_writeback_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) #else /* CONFIG_CGROUP_WRITEBACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) static struct bdi_writeback *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) locked_inode_to_wb_and_lock_list(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) __releases(&inode->i_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) __acquires(&wb->list_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) struct bdi_writeback *wb = inode_to_wb(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) return wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) __acquires(&wb->list_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) struct bdi_writeback *wb = inode_to_wb(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) return wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) struct wb_writeback_work *base_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) bool skip_if_busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) base_work->auto_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) wb_queue_work(&bdi->wb, base_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) #endif /* CONFIG_CGROUP_WRITEBACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) * Add in the number of potentially dirty inodes, because each inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) * write can dirty pagecache in the underlying blockdev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) static unsigned long get_nr_dirty_pages(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) return global_node_page_state(NR_FILE_DIRTY) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) get_nr_dirty_inodes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) if (!wb_has_dirty_io(wb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) * All callers of this function want to start writeback of all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) * dirty pages. Places like vmscan can call this at a very
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) * high frequency, causing pointless allocations of tons of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) * work items and keeping the flusher threads busy retrieving
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * that work. Ensure that we only allow one of them pending and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) * inflight at the time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) if (test_bit(WB_start_all, &wb->state) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) test_and_set_bit(WB_start_all, &wb->state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) wb->start_all_reason = reason;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) wb_wakeup(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) * wb_start_background_writeback - start background writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) * @wb: bdi_writback to write from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) * Description:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) * This makes sure WB_SYNC_NONE background writeback happens. When
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) * this function returns, it is only guaranteed that for given wb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) * some IO is happening if we are over background dirty threshold.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) * Caller need not hold sb s_umount semaphore.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) void wb_start_background_writeback(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) * We just wake up the flusher thread. It will perform background
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) * writeback as soon as there is no other work to do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) trace_writeback_wake_background(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) wb_wakeup(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) * Remove the inode from the writeback list it is on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) void inode_io_list_del(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) wb = inode_to_wb_and_lock_list(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) inode_io_list_del_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) EXPORT_SYMBOL(inode_io_list_del);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) * mark an inode as under writeback on the sb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) void sb_mark_inode_writeback(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) if (list_empty(&inode->i_wb_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) if (list_empty(&inode->i_wb_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) trace_sb_mark_inode_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) * clear an inode as under writeback on the sb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) void sb_clear_inode_writeback(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) if (!list_empty(&inode->i_wb_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) if (!list_empty(&inode->i_wb_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) list_del_init(&inode->i_wb_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) trace_sb_clear_inode_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) * furthest end of its superblock's dirty-inode list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) * Before stamping the inode's ->dirtied_when, we check to see whether it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) * already the most-recently-dirtied inode on the b_dirty list. If that is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) * the case then the inode must have been redirtied while it was being written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) * out and we don't reset its dirtied_when.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) assert_spin_locked(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) if (!list_empty(&wb->b_dirty)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) struct inode *tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) tail = wb_inode(wb->b_dirty.next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) if (time_before(inode->dirtied_when, tail->dirtied_when))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) inode->dirtied_when = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) inode_io_list_move_locked(inode, wb, &wb->b_dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) inode->i_state &= ~I_SYNC_QUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) redirty_tail_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) * requeue inode for re-scanning after bdi->b_io list is exhausted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) inode_io_list_move_locked(inode, wb, &wb->b_more_io);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) static void inode_sync_complete(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) inode->i_state &= ~I_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) /* If inode is clean an unused, put it into LRU now... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) inode_add_lru(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) /* Waiters must see I_SYNC cleared before being woken up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) wake_up_bit(&inode->i_state, __I_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) static bool inode_dirtied_after(struct inode *inode, unsigned long t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) bool ret = time_after(inode->dirtied_when, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) #ifndef CONFIG_64BIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) * For inodes being constantly redirtied, dirtied_when can get stuck.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) * It _appears_ to be in the future, but is actually in distant past.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) * This test is necessary to prevent such wrapped-around relative times
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) * from permanently stopping the whole bdi writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) ret = ret && time_before_eq(inode->dirtied_when, jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) #define EXPIRE_DIRTY_ATIME 0x0001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) * Move expired (dirtied before dirtied_before) dirty inodes from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) * @delaying_queue to @dispatch_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) static int move_expired_inodes(struct list_head *delaying_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) struct list_head *dispatch_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) unsigned long dirtied_before)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) LIST_HEAD(tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) struct list_head *pos, *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) struct super_block *sb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) int do_sb_sort = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) int moved = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) while (!list_empty(delaying_queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) inode = wb_inode(delaying_queue->prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) if (inode_dirtied_after(inode, dirtied_before))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) list_move(&inode->i_io_list, &tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) moved++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) inode->i_state |= I_SYNC_QUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) if (sb_is_blkdev_sb(inode->i_sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) if (sb && sb != inode->i_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) do_sb_sort = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) /* just one sb in list, splice to dispatch_queue and we're done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) if (!do_sb_sort) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) list_splice(&tmp, dispatch_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) /* Move inodes from one superblock together */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) while (!list_empty(&tmp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) sb = wb_inode(tmp.prev)->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) list_for_each_prev_safe(pos, node, &tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) inode = wb_inode(pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) if (inode->i_sb == sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) list_move(&inode->i_io_list, dispatch_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) return moved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) * Queue all expired dirty inodes for io, eldest first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) * Before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) * newly dirtied b_dirty b_io b_more_io
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) * =============> gf edc BA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) * After
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) * newly dirtied b_dirty b_io b_more_io
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) * =============> g fBAedc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) * |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) * +--> dequeue for IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) unsigned long dirtied_before)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) int moved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) unsigned long time_expire_jif = dirtied_before;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) assert_spin_locked(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) list_splice_init(&wb->b_more_io, &wb->b_io);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) if (!work->for_sync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) time_expire_jif);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) if (moved)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) wb_io_lists_populated(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) trace_writeback_queue_io(wb, work, dirtied_before, moved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) static int write_inode(struct inode *inode, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) trace_writeback_write_inode_start(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) ret = inode->i_sb->s_op->write_inode(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) trace_writeback_write_inode(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) * Wait for writeback on an inode to complete. Called with i_lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) * Caller must make sure inode cannot go away when we drop i_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) static void __inode_wait_for_writeback(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) __releases(inode->i_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) __acquires(inode->i_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) wait_queue_head_t *wqh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) while (inode->i_state & I_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) __wait_on_bit(wqh, &wq, bit_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) * Wait for writeback on an inode to complete. Caller must have inode pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) void inode_wait_for_writeback(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) __inode_wait_for_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) * Sleep until I_SYNC is cleared. This function must be called with i_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) * held and drops it. It is aimed for callers not holding any inode reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) * so once i_lock is dropped, inode can go away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) static void inode_sleep_on_writeback(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) __releases(inode->i_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) int sleep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) sleep = inode->i_state & I_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) if (sleep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) finish_wait(wqh, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) * Find proper writeback list for the inode depending on its current state and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) * possibly also change of its state while we were doing writeback. Here we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) * handle things such as livelock prevention or fairness of writeback among
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) * inodes. This function can be called only by flusher thread - noone else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) * processes all inodes in writeback lists and requeueing inodes behind flusher
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) * thread's back can have unexpected consequences.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) if (inode->i_state & I_FREEING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) * Sync livelock prevention. Each inode is tagged and synced in one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) * shot. If still dirty, it will be redirty_tail()'ed below. Update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) * the dirty time to prevent enqueue and sync it again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) if ((inode->i_state & I_DIRTY) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) inode->dirtied_when = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) if (wbc->pages_skipped) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) * writeback is not making progress due to locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) * buffers. Skip this inode for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) redirty_tail_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) * We didn't write back all the pages. nfs_writepages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) * sometimes bales out without doing anything.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) if (wbc->nr_to_write <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) /* Slice used up. Queue for next turn. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) requeue_io(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) * Writeback blocked by something other than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) * congestion. Delay the inode for some time to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) * avoid spinning on the CPU (100% iowait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) * retrying writeback of the dirty page/inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) * that cannot be performed immediately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) redirty_tail_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) } else if (inode->i_state & I_DIRTY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) * Filesystems can dirty the inode during writeback operations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) * such as delayed allocation during submission or metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) * updates after data IO completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) redirty_tail_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) } else if (inode->i_state & I_DIRTY_TIME) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) inode->dirtied_when = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) inode->i_state &= ~I_SYNC_QUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) /* The inode is clean. Remove from writeback lists. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) inode_io_list_del_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) * Write out an inode and its dirty pages. Do not update the writeback list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) * linkage. That is left to the caller. The caller is also responsible for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) * setting I_SYNC flag and calling inode_sync_complete() to clear it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) long nr_to_write = wbc->nr_to_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) unsigned dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) WARN_ON(!(inode->i_state & I_SYNC));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) trace_writeback_single_inode_start(inode, wbc, nr_to_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) ret = do_writepages(mapping, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) * Make sure to wait on the data before writing out the metadata.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) * This is important for filesystems that modify metadata on data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) * I/O completion. We don't do it for sync(2) writeback because it has a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) * separate, external IO completion path and ->sync_fs for guaranteeing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) * inode metadata is written back correctly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) int err = filemap_fdatawait(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) ret = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) * If the inode has dirty timestamps and we need to write them, call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) * mark_inode_dirty_sync() to notify the filesystem about it and to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) * change I_DIRTY_TIME into I_DIRTY_SYNC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) if ((inode->i_state & I_DIRTY_TIME) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) time_after(jiffies, inode->dirtied_time_when +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) dirtytime_expire_interval * HZ))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) trace_writeback_lazytime(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) mark_inode_dirty_sync(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) * Some filesystems may redirty the inode during the writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) * due to delalloc, clear dirty metadata flags right before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) * write_inode()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) dirty = inode->i_state & I_DIRTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) inode->i_state &= ~dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) * Paired with smp_mb() in __mark_inode_dirty(). This allows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) * __mark_inode_dirty() to test i_state without grabbing i_lock -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) * either they see the I_DIRTY bits cleared or we see the dirtied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) * inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) * I_DIRTY_PAGES is always cleared together above even if @mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) * still has dirty pages. The flag is reinstated after smp_mb() if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) * necessary. This guarantees that either __mark_inode_dirty()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) inode->i_state |= I_DIRTY_PAGES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) /* Don't write the inode if only I_DIRTY_PAGES was set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) if (dirty & ~I_DIRTY_PAGES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) int err = write_inode(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) ret = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) trace_writeback_single_inode(inode, wbc, nr_to_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) * Write out an inode's dirty pages. Either the caller has an active reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) * on the inode or the inode has I_WILL_FREE set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) * This function is designed to be called for writing back one inode which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) * and does more profound writeback list handling in writeback_sb_inodes().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) static int writeback_single_inode(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) if (!atomic_read(&inode->i_count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) WARN_ON(inode->i_state & I_WILL_FREE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) if (inode->i_state & I_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) if (wbc->sync_mode != WB_SYNC_ALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) * It's a data-integrity sync. We must wait. Since callers hold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) * inode reference or inode has I_WILL_FREE set, it cannot go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) * away under us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) __inode_wait_for_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) WARN_ON(inode->i_state & I_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) * Skip inode if it is clean and we have no outstanding writeback in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) * function since flusher thread may be doing for example sync in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) * parallel and if we move the inode, it could get skipped. So here we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) * make sure inode is on some writeback list and leave it there unless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) * we have completely cleaned the inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) if (!(inode->i_state & I_DIRTY_ALL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) (wbc->sync_mode != WB_SYNC_ALL ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) inode->i_state |= I_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) wbc_attach_and_unlock_inode(wbc, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) ret = __writeback_single_inode(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) wbc_detach_inode(wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) wb = inode_to_wb_and_lock_list(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) * If inode is clean, remove it from writeback lists. Otherwise don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) * touch it. See comment above for explanation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) if (!(inode->i_state & I_DIRTY_ALL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) inode_io_list_del_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) inode_sync_complete(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) static long writeback_chunk_size(struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) long pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) * here avoids calling into writeback_inodes_wb() more than once.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) * The intended call sequence for WB_SYNC_ALL writeback is:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) * wb_writeback()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) * writeback_sb_inodes() <== called only once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) * write_cache_pages() <== called once for each inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) * (quickly) tag currently dirty pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) * (maybe slowly) sync all tagged pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) pages = LONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) pages = min(wb->avg_write_bandwidth / 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) global_wb_domain.dirty_limit / DIRTY_SCOPE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) pages = min(pages, work->nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) pages = round_down(pages + MIN_WRITEBACK_PAGES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) MIN_WRITEBACK_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) return pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) * Write a portion of b_io inodes which belong to @sb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) * Return the number of pages and/or inodes written.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) * NOTE! This is called with wb->list_lock held, and will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) * unlock and relock that for each inode it ends up doing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) * IO for.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) static long writeback_sb_inodes(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) struct writeback_control wbc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) .sync_mode = work->sync_mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) .tagged_writepages = work->tagged_writepages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) .for_kupdate = work->for_kupdate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) .for_background = work->for_background,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) .for_sync = work->for_sync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) .range_cyclic = work->range_cyclic,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) .range_start = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) .range_end = LLONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) unsigned long start_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) long write_chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) long wrote = 0; /* count both pages and inodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) while (!list_empty(&wb->b_io)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) struct inode *inode = wb_inode(wb->b_io.prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) struct bdi_writeback *tmp_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) if (inode->i_sb != sb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) if (work->sb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) * We only want to write back data for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) * superblock, move all inodes not belonging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) * to it back onto the dirty list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) redirty_tail(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) * The inode belongs to a different superblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) * Bounce back to the caller to unpin this and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) * pin the next superblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) * Don't bother with new inodes or inodes being freed, first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) * kind does not need periodic writeout yet, and for the latter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) * kind writeout is handled by the freer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) redirty_tail_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) * If this inode is locked for writeback and we are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) * doing writeback-for-data-integrity, move it to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) * b_more_io so that writeback can proceed with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) * other inodes on s_io.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) * We'll have another go at writing back this inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) * when we completed a full scan of b_io.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) requeue_io(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) trace_writeback_sb_inodes_requeue(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) * We already requeued the inode if it had I_SYNC set and we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) * are doing WB_SYNC_NONE writeback. So this catches only the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) * WB_SYNC_ALL case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) if (inode->i_state & I_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) /* Wait for I_SYNC. This function drops i_lock... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) inode_sleep_on_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) /* Inode may be gone, start again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) inode->i_state |= I_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) wbc_attach_and_unlock_inode(&wbc, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) write_chunk = writeback_chunk_size(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) wbc.nr_to_write = write_chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) wbc.pages_skipped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) * We use I_SYNC to pin the inode in memory. While it is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) * evict_inode() will wait so the inode cannot be freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) __writeback_single_inode(inode, &wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) wbc_detach_inode(&wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) work->nr_pages -= write_chunk - wbc.nr_to_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) wrote += write_chunk - wbc.nr_to_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) if (need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) * We're trying to balance between building up a nice
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) * long list of IOs to improve our merge rate, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) * getting those IOs out quickly for anyone throttling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) * in balance_dirty_pages(). cond_resched() doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) * unplug, so get our IOs out the door before we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) * give up the CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) blk_flush_plug(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) * Requeue @inode if still dirty. Be careful as @inode may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) * have been switched to another wb in the meantime.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) tmp_wb = inode_to_wb_and_lock_list(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) if (!(inode->i_state & I_DIRTY_ALL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) wrote++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) requeue_inode(inode, tmp_wb, &wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) inode_sync_complete(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) if (unlikely(tmp_wb != wb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) spin_unlock(&tmp_wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) * bail out to wb_writeback() often enough to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) * background threshold and other termination conditions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) if (wrote) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) if (time_is_before_jiffies(start_time + HZ / 10UL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) if (work->nr_pages <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) return wrote;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) static long __writeback_inodes_wb(struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) unsigned long start_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) long wrote = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) while (!list_empty(&wb->b_io)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) struct inode *inode = wb_inode(wb->b_io.prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) if (!trylock_super(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) * trylock_super() may fail consistently due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) * s_umount being grabbed by someone else. Don't use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) * requeue_io() to avoid busy retrying the inode/sb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) redirty_tail(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) wrote += writeback_sb_inodes(sb, wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) up_read(&sb->s_umount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) /* refer to the same tests at the end of writeback_sb_inodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) if (wrote) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) if (time_is_before_jiffies(start_time + HZ / 10UL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) if (work->nr_pages <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) /* Leave any unwritten inodes on b_io */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) return wrote;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) .nr_pages = nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) .sync_mode = WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) .range_cyclic = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) .reason = reason,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) if (list_empty(&wb->b_io))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) queue_io(wb, &work, jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) __writeback_inodes_wb(wb, &work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) return nr_pages - work.nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) * Explicit flushing or periodic writeback of "old" data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) * Define "old": the first time one of an inode's pages is dirtied, we mark the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) * dirtying-time in the inode's address_space. So this periodic writeback code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) * just walks the superblock inode list, writing back any inodes which are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) * older than a specific point in time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) * Try to run once per dirty_writeback_interval. But if a writeback event
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) * takes longer than a dirty_writeback_interval interval, then leave a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) * one-second gap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) * dirtied_before takes precedence over nr_to_write. So we'll only write back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) * all dirty pages if they are all attached to "old" mappings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) static long wb_writeback(struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) unsigned long wb_start = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) long nr_pages = work->nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) unsigned long dirtied_before = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) long progress;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) * Stop writeback when nr_pages has been consumed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) if (work->nr_pages <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) * Background writeout and kupdate-style writeback may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) * run forever. Stop them if there is other work to do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) * so that e.g. sync can proceed. They'll be restarted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) * after the other works are all done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) if ((work->for_background || work->for_kupdate) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) !list_empty(&wb->work_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) * For background writeout, stop when we are below the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) * background dirty threshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) if (work->for_background && !wb_over_bg_thresh(wb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) * Kupdate and background works are special and we want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) * include all inodes that need writing. Livelock avoidance is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) * handled by these works yielding to any other work so we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) * safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) if (work->for_kupdate) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) dirtied_before = jiffies -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) msecs_to_jiffies(dirty_expire_interval * 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) } else if (work->for_background)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) dirtied_before = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) trace_writeback_start(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) if (list_empty(&wb->b_io))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) queue_io(wb, work, dirtied_before);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) if (work->sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) progress = writeback_sb_inodes(work->sb, wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) progress = __writeback_inodes_wb(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) trace_writeback_written(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) wb_update_bandwidth(wb, wb_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) * Did we write something? Try for more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) * Dirty inodes are moved to b_io for writeback in batches.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) * The completion of the current batch does not necessarily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) * mean the overall work is done. So we keep looping as long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) * as made some progress on cleaning pages or inodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) if (progress)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) * No more inodes for IO, bail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) if (list_empty(&wb->b_more_io))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) * Nothing written. Wait for some inode to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) * become available for writeback. Otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) * we'll just busyloop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) trace_writeback_wait(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) inode = wb_inode(wb->b_more_io.prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) /* This function drops i_lock... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) inode_sleep_on_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) return nr_pages - work->nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) * Return the next wb_writeback_work struct that hasn't been processed yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) struct wb_writeback_work *work = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) spin_lock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) if (!list_empty(&wb->work_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) work = list_entry(wb->work_list.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) struct wb_writeback_work, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) list_del_init(&work->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) spin_unlock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) static long wb_check_background_flush(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) if (wb_over_bg_thresh(wb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) .nr_pages = LONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) .sync_mode = WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) .for_background = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) .range_cyclic = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) .reason = WB_REASON_BACKGROUND,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) return wb_writeback(wb, &work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) static long wb_check_old_data_flush(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) unsigned long expired;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) long nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) * When set to zero, disable periodic writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) if (!dirty_writeback_interval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) expired = wb->last_old_flush +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) msecs_to_jiffies(dirty_writeback_interval * 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) if (time_before(jiffies, expired))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) wb->last_old_flush = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) nr_pages = get_nr_dirty_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) if (nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) .nr_pages = nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) .sync_mode = WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) .for_kupdate = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) .range_cyclic = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) .reason = WB_REASON_PERIODIC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) return wb_writeback(wb, &work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) static long wb_check_start_all(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) long nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) if (!test_bit(WB_start_all, &wb->state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) nr_pages = get_nr_dirty_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) if (nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) .nr_pages = wb_split_bdi_pages(wb, nr_pages),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) .sync_mode = WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) .range_cyclic = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) .reason = wb->start_all_reason,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) nr_pages = wb_writeback(wb, &work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) clear_bit(WB_start_all, &wb->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) * Retrieve work items and do the writeback they describe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) static long wb_do_writeback(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) struct wb_writeback_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) long wrote = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) set_bit(WB_writeback_running, &wb->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) while ((work = get_next_work_item(wb)) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) trace_writeback_exec(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) wrote += wb_writeback(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) finish_writeback_work(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) * Check for a flush-everything request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) wrote += wb_check_start_all(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) * Check for periodic writeback, kupdated() style
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) wrote += wb_check_old_data_flush(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) wrote += wb_check_background_flush(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) clear_bit(WB_writeback_running, &wb->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) return wrote;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) * Handle writeback of dirty data for the device backed by this bdi. Also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) * reschedules periodically and does kupdated style flushing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) void wb_workfn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) struct bdi_writeback *wb = container_of(to_delayed_work(work),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) struct bdi_writeback, dwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) long pages_written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) current->flags |= PF_SWAPWRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) if (likely(!current_is_workqueue_rescuer() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) !test_bit(WB_registered, &wb->state))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) * The normal path. Keep writing back @wb until its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) * work_list is empty. Note that this path is also taken
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) * if @wb is shutting down even when we're running off the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) * rescuer as work_list needs to be drained.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) pages_written = wb_do_writeback(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) trace_writeback_pages_written(pages_written);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) } while (!list_empty(&wb->work_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) * bdi_wq can't get enough workers and we're running off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) * the emergency worker. Don't hog it. Hopefully, 1024 is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) * enough for efficient IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) pages_written = writeback_inodes_wb(wb, 1024,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) WB_REASON_FORKER_THREAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) trace_writeback_pages_written(pages_written);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) if (!list_empty(&wb->work_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) wb_wakeup(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) wb_wakeup_delayed(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) current->flags &= ~PF_SWAPWRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) * Start writeback of `nr_pages' pages on this bdi. If `nr_pages' is zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) * write back the whole world.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) if (!bdi_has_dirty_io(bdi))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) wb_start_writeback(wb, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) __wakeup_flusher_threads_bdi(bdi, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) * Wakeup the flusher threads to start writeback of all currently dirty pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) void wakeup_flusher_threads(enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) struct backing_dev_info *bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) * If we are expecting writeback progress we must submit plugged IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) if (blk_needs_flush_plug(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) blk_schedule_flush_plug(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) __wakeup_flusher_threads_bdi(bdi, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) * Wake up bdi's periodically to make sure dirtytime inodes gets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) * written back periodically. We deliberately do *not* check the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) * b_dirtytime list in wb_has_dirty_io(), since this would cause the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) * kernel to be constantly waking up once there are any dirtytime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) * inodes on the system. So instead we define a separate delayed work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) * function which gets called much more rarely. (By default, only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) * once every 12 hours.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) * If there is any other write activity going on in the file system,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) * this function won't be necessary. But if the only thing that has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) * happened on the file system is a dirtytime inode caused by an atime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) * update, we need this infrastructure below to make sure that inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) * eventually gets pushed out to disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) static void wakeup_dirtytime_writeback(struct work_struct *w);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) static void wakeup_dirtytime_writeback(struct work_struct *w)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) struct backing_dev_info *bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) if (!list_empty(&wb->b_dirty_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) wb_wakeup(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) static int __init start_dirtytime_writeback(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) __initcall(start_dirtytime_writeback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) int dirtytime_interval_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) void *buffer, size_t *lenp, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) if (ret == 0 && write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) mod_delayed_work(system_wq, &dirtytime_work, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) * __mark_inode_dirty - internal function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) * @inode: inode to mark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) * Mark an inode as dirty. Callers should use mark_inode_dirty or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) * mark_inode_dirty_sync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) * Put the inode on the super block's dirty list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) * CAREFUL! We mark it dirty unconditionally, but move it onto the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) * dirty list only if it is hashed or if it refers to a blockdev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) * If it was not hashed, it will never be added to the dirty list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) * even if it is later hashed, as it will have been marked dirty already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) * In short, make sure you hash any inodes _before_ you start marking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) * them dirty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) * the kernel-internal blockdev inode represents the dirtying time of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) * blockdev's pages. This is why for I_DIRTY_PAGES we always use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) * page->mapping->host, so the page-dirtying time is recorded in the internal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) * blockdev inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) void __mark_inode_dirty(struct inode *inode, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) int dirtytime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) trace_writeback_mark_inode_dirty(inode, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) * Don't do this for I_DIRTY_PAGES - that doesn't actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) * dirty the inode itself
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) trace_writeback_dirty_inode_start(inode, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) if (sb->s_op->dirty_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) sb->s_op->dirty_inode(inode, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) trace_writeback_dirty_inode(inode, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) if (flags & I_DIRTY_INODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) flags &= ~I_DIRTY_TIME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) dirtytime = flags & I_DIRTY_TIME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) * Paired with smp_mb() in __writeback_single_inode() for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) * following lockless i_state test. See there for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) if (((inode->i_state & flags) == flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) (dirtytime && (inode->i_state & I_DIRTY_INODE)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) if (dirtytime && (inode->i_state & I_DIRTY_INODE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) goto out_unlock_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) if ((inode->i_state & flags) != flags) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) const int was_dirty = inode->i_state & I_DIRTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) inode_attach_wb(inode, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) if (flags & I_DIRTY_INODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) inode->i_state &= ~I_DIRTY_TIME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) inode->i_state |= flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) * If the inode is queued for writeback by flush worker, just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) * update its dirty state. Once the flush worker is done with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) * the inode it will place it on the appropriate superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) * list, based upon its state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) if (inode->i_state & I_SYNC_QUEUED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) goto out_unlock_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) * Only add valid (hashed) inodes to the superblock's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) * dirty list. Add blockdev inodes as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) if (!S_ISBLK(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) if (inode_unhashed(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) goto out_unlock_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) if (inode->i_state & I_FREEING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) goto out_unlock_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) * If the inode was already on b_dirty/b_io/b_more_io, don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) * reposition it (that would break b_dirty time-ordering).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) if (!was_dirty) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) struct list_head *dirty_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) bool wakeup_bdi = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) wb = locked_inode_to_wb_and_lock_list(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) WARN((wb->bdi->capabilities & BDI_CAP_WRITEBACK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) !test_bit(WB_registered, &wb->state),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) "bdi-%s not registered\n", bdi_dev_name(wb->bdi));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) inode->dirtied_when = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) if (dirtytime)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) inode->dirtied_time_when = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) if (inode->i_state & I_DIRTY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) dirty_list = &wb->b_dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) dirty_list = &wb->b_dirty_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) wakeup_bdi = inode_io_list_move_locked(inode, wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) dirty_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) trace_writeback_dirty_inode_enqueue(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) * If this is the first dirty inode for this bdi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) * we have to wake-up the corresponding bdi thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) * to make sure background write-back happens
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) * later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) if (wakeup_bdi &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) (wb->bdi->capabilities & BDI_CAP_WRITEBACK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) wb_wakeup_delayed(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) out_unlock_inode:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) EXPORT_SYMBOL_NS(__mark_inode_dirty, ANDROID_GKI_VFS_EXPORT_ONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) * The @s_sync_lock is used to serialise concurrent sync operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) * to avoid lock contention problems with concurrent wait_sb_inodes() calls.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) * Concurrent callers will block on the s_sync_lock rather than doing contending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) * walks. The queueing maintains sync(2) required behaviour as all the IO that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) * has been issued up to the time this function is enter is guaranteed to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) * completed by the time we have gained the lock and waited for all IO that is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) * in progress regardless of the order callers are granted the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) static void wait_sb_inodes(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) LIST_HEAD(sync_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) * We need to be protected against the filesystem going from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) * r/o to r/w or vice versa.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) WARN_ON(!rwsem_is_locked(&sb->s_umount));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) mutex_lock(&sb->s_sync_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) * Splice the writeback list onto a temporary list to avoid waiting on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) * inodes that have started writeback after this point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) * Use rcu_read_lock() to keep the inodes around until we have a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) * the local list because inodes can be dropped from either by writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) * completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) spin_lock_irq(&sb->s_inode_wblist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) list_splice_init(&sb->s_inodes_wb, &sync_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) * Data integrity sync. Must wait for all pages under writeback, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) * there may have been pages dirtied before our sync call, but which had
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) * writeout started before we write it out. In which case, the inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) * may not be on the dirty list, but we still have to wait for that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) * writeout.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) while (!list_empty(&sync_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) struct inode *inode = list_first_entry(&sync_list, struct inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) i_wb_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) * Move each inode back to the wb list before we drop the lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) * to preserve consistency between i_wb_list and the mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) * writeback tag. Writeback completion is responsible to remove
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) * the inode from either list once the writeback tag is cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) * The mapping can appear untagged while still on-list since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) * do not have the mapping lock. Skip it here, wb completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) * will remove it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) spin_unlock_irq(&sb->s_inode_wblist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) spin_lock_irq(&sb->s_inode_wblist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) __iget(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) * We keep the error status of individual mapping so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) * applications can catch the writeback error using fsync(2).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) * See filemap_fdatawait_keep_errors() for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) filemap_fdatawait_keep_errors(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) spin_lock_irq(&sb->s_inode_wblist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) spin_unlock_irq(&sb->s_inode_wblist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) mutex_unlock(&sb->s_sync_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) enum wb_reason reason, bool skip_if_busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) struct backing_dev_info *bdi = sb->s_bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) DEFINE_WB_COMPLETION(done, bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) .sb = sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) .sync_mode = WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) .tagged_writepages = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) .done = &done,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) .nr_pages = nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) .reason = reason,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) WARN_ON(!rwsem_is_locked(&sb->s_umount));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) wb_wait_for_completion(&done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) * writeback_inodes_sb_nr - writeback dirty inodes from given super_block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) * @sb: the superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) * @nr: the number of pages to write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) * @reason: reason why some writeback work initiated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) * Start writeback on some inodes on this super_block. No guarantees are made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) * on how many (if any) will be written, and this function does not wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) * for IO completion of submitted IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) void writeback_inodes_sb_nr(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) unsigned long nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) __writeback_inodes_sb_nr(sb, nr, reason, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) EXPORT_SYMBOL(writeback_inodes_sb_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) * writeback_inodes_sb - writeback dirty inodes from given super_block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) * @sb: the superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) * @reason: reason why some writeback work was initiated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) * Start writeback on some inodes on this super_block. No guarantees are made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) * on how many (if any) will be written, and this function does not wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) * for IO completion of submitted IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) EXPORT_SYMBOL(writeback_inodes_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) * try_to_writeback_inodes_sb - try to start writeback if none underway
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) * @sb: the superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) * @reason: reason why some writeback work was initiated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) * Invoke __writeback_inodes_sb_nr if no writeback is currently underway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) if (!down_read_trylock(&sb->s_umount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) __writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) up_read(&sb->s_umount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) EXPORT_SYMBOL_NS(try_to_writeback_inodes_sb, ANDROID_GKI_VFS_EXPORT_ONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) * sync_inodes_sb - sync sb inode pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) * @sb: the superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) * This function writes and waits on any dirty inode belonging to this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) * super_block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) void sync_inodes_sb(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) struct backing_dev_info *bdi = sb->s_bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) DEFINE_WB_COMPLETION(done, bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) .sb = sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) .sync_mode = WB_SYNC_ALL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) .nr_pages = LONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) .range_cyclic = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) .done = &done,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) .reason = WB_REASON_SYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) .for_sync = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) * Can't skip on !bdi_has_dirty() because we should wait for !dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) * inodes under writeback and I_DIRTY_TIME inodes ignored by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) * bdi_has_dirty() need to be written out too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) if (bdi == &noop_backing_dev_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) WARN_ON(!rwsem_is_locked(&sb->s_umount));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) /* protect against inode wb switch, see inode_switch_wbs_work_fn() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) bdi_down_write_wb_switch_rwsem(bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) bdi_split_work_to_wbs(bdi, &work, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) wb_wait_for_completion(&done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) bdi_up_write_wb_switch_rwsem(bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) wait_sb_inodes(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) EXPORT_SYMBOL(sync_inodes_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) * write_inode_now - write an inode to disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) * @inode: inode to write to disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) * @sync: whether the write should be synchronous or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) * This function commits an inode to disk immediately if it is dirty. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) * primarily needed by knfsd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) * The caller must either have a ref on the inode or must have set I_WILL_FREE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) int write_inode_now(struct inode *inode, int sync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) struct writeback_control wbc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) .nr_to_write = LONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) .range_start = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) .range_end = LLONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) if (!mapping_can_writeback(inode->i_mapping))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) wbc.nr_to_write = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) return writeback_single_inode(inode, &wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) EXPORT_SYMBOL_NS(write_inode_now, ANDROID_GKI_VFS_EXPORT_ONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) * sync_inode - write an inode and its pages to disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) * @inode: the inode to sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) * @wbc: controls the writeback mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) * sync_inode() will write an inode and its pages to disk. It will also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) * correctly update the inode on its superblock's dirty inode lists and will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) * update inode->i_state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) * The caller must have a ref on the inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) int sync_inode(struct inode *inode, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) return writeback_single_inode(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) EXPORT_SYMBOL(sync_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) * sync_inode_metadata - write an inode to disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) * @inode: the inode to sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) * @wait: wait for I/O to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) * Write an inode to disk and adjust its dirty state after completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) * Note: only writes the actual inode, no associated data or other metadata.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) int sync_inode_metadata(struct inode *inode, int wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) struct writeback_control wbc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) .nr_to_write = 0, /* metadata-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) return sync_inode(inode, &wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) EXPORT_SYMBOL_NS(sync_inode_metadata, ANDROID_GKI_VFS_EXPORT_ONLY);