Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * fs/fs-writeback.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * Copyright (C) 2002, Linus Torvalds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  * Contains all the functions related to writing back and waiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  * upon dirty inodes against superblocks, and writing back dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  * pages against inodes.  ie: data writeback.  Writeout of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  * inode itself is not handled here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  * 10Apr2002	Andrew Morton
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  *		Split out of fs/inode.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  *		Additions for address_space-based writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <linux/kthread.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <linux/tracepoint.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <linux/device.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <linux/memcontrol.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  * 4MB minimal write chunk size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) #define MIN_WRITEBACK_PAGES	(4096UL >> (PAGE_SHIFT - 10))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40)  * Passed into wb_writeback(), essentially a subset of writeback_control
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) struct wb_writeback_work {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) 	long nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) 	struct super_block *sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) 	enum writeback_sync_modes sync_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) 	unsigned int tagged_writepages:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) 	unsigned int for_kupdate:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 	unsigned int range_cyclic:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) 	unsigned int for_background:1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) 	unsigned int for_sync:1;	/* sync(2) WB_SYNC_ALL writeback */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 	unsigned int auto_free:1;	/* free on completion */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) 	enum wb_reason reason;		/* why was writeback initiated? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 	struct list_head list;		/* pending work list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) 	struct wb_completion *done;	/* set if the caller waits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59)  * If an inode is constantly having its pages dirtied, but then the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60)  * updates stop dirtytime_expire_interval seconds in the past, it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61)  * possible for the worst case time between when an inode has its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62)  * timestamps updated and when they finally get written out to be two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63)  * dirtytime_expire_intervals.  We set the default to 12 hours (in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64)  * seconds), which means most of the time inodes will have their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65)  * timestamps written to disk after 12 hours, but in the worst case a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66)  * few inodes might not their timestamps updated for 24 hours.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) unsigned int dirtytime_expire_interval = 12 * 60 * 60;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) static inline struct inode *wb_inode(struct list_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 	return list_entry(head, struct inode, i_io_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76)  * Include the creation of the trace points after defining the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77)  * wb_writeback_work structure and inline functions so that the definition
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78)  * remains local to this file.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) #include <trace/events/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) EXPORT_TRACEPOINT_SYMBOL_GPL(wbc_writepage);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) static bool wb_io_lists_populated(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 	if (wb_has_dirty_io(wb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 		set_bit(WB_has_dirty_io, &wb->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 		WARN_ON_ONCE(!wb->avg_write_bandwidth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 		atomic_long_add(wb->avg_write_bandwidth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 				&wb->bdi->tot_write_bandwidth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) static void wb_io_lists_depopulated(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 	if (wb_has_dirty_io(wb) && list_empty(&wb->b_dirty) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	    list_empty(&wb->b_io) && list_empty(&wb->b_more_io)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 		clear_bit(WB_has_dirty_io, &wb->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 		WARN_ON_ONCE(atomic_long_sub_return(wb->avg_write_bandwidth,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 					&wb->bdi->tot_write_bandwidth) < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109)  * inode_io_list_move_locked - move an inode onto a bdi_writeback IO list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110)  * @inode: inode to be moved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111)  * @wb: target bdi_writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112)  * @head: one of @wb->b_{dirty|io|more_io|dirty_time}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114)  * Move @inode->i_io_list to @list of @wb and set %WB_has_dirty_io.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115)  * Returns %true if @inode is the first occupant of the !dirty_time IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116)  * lists; otherwise, %false.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) static bool inode_io_list_move_locked(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 				      struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 				      struct list_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	assert_spin_locked(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	list_move(&inode->i_io_list, head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	/* dirty_time doesn't count as dirty_io until expiration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	if (head != &wb->b_dirty_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 		return wb_io_lists_populated(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	wb_io_lists_depopulated(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135)  * inode_io_list_del_locked - remove an inode from its bdi_writeback IO list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136)  * @inode: inode to be removed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137)  * @wb: bdi_writeback @inode is being removed from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139)  * Remove @inode which may be on one of @wb->b_{dirty|io|more_io} lists and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140)  * clear %WB_has_dirty_io if all are empty afterwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) static void inode_io_list_del_locked(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 				     struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	assert_spin_locked(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 	assert_spin_locked(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	inode->i_state &= ~I_SYNC_QUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 	list_del_init(&inode->i_io_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	wb_io_lists_depopulated(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) static void wb_wakeup(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	spin_lock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 	if (test_bit(WB_registered, &wb->state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 		mod_delayed_work(bdi_wq, &wb->dwork, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 	spin_unlock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) static void finish_writeback_work(struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 				  struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	struct wb_completion *done = work->done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	if (work->auto_free)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 		kfree(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	if (done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 		wait_queue_head_t *waitq = done->waitq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 		/* @done can't be accessed after the following dec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 		if (atomic_dec_and_test(&done->cnt))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 			wake_up_all(waitq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) static void wb_queue_work(struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 			  struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	trace_writeback_queue(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	if (work->done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 		atomic_inc(&work->done->cnt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	spin_lock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	if (test_bit(WB_registered, &wb->state)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 		list_add_tail(&work->list, &wb->work_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 		mod_delayed_work(bdi_wq, &wb->dwork, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 		finish_writeback_work(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	spin_unlock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197)  * wb_wait_for_completion - wait for completion of bdi_writeback_works
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198)  * @done: target wb_completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200)  * Wait for one or more work items issued to @bdi with their ->done field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201)  * set to @done, which should have been initialized with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202)  * DEFINE_WB_COMPLETION().  This function returns after all such work items
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203)  * are completed.  Work items which are waited upon aren't freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204)  * automatically on completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) void wb_wait_for_completion(struct wb_completion *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	atomic_dec(&done->cnt);		/* put down the initial count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	wait_event(*done->waitq, !atomic_read(&done->cnt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) #ifdef CONFIG_CGROUP_WRITEBACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215)  * Parameters for foreign inode detection, see wbc_detach_inode() to see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216)  * how they're used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218)  * These paramters are inherently heuristical as the detection target
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219)  * itself is fuzzy.  All we want to do is detaching an inode from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220)  * current owner if it's being written to by some other cgroups too much.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222)  * The current cgroup writeback is built on the assumption that multiple
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223)  * cgroups writing to the same inode concurrently is very rare and a mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224)  * of operation which isn't well supported.  As such, the goal is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225)  * taking too long when a different cgroup takes over an inode while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226)  * avoiding too aggressive flip-flops from occasional foreign writes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228)  * We record, very roughly, 2s worth of IO time history and if more than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229)  * half of that is foreign, trigger the switch.  The recording is quantized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230)  * to 16 slots.  To avoid tiny writes from swinging the decision too much,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231)  * writes smaller than 1/8 of avg size are ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) #define WB_FRN_TIME_SHIFT	13	/* 1s = 2^13, upto 8 secs w/ 16bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) #define WB_FRN_TIME_AVG_SHIFT	3	/* avg = avg * 7/8 + new * 1/8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) #define WB_FRN_TIME_CUT_DIV	8	/* ignore rounds < avg / 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) #define WB_FRN_TIME_PERIOD	(2 * (1 << WB_FRN_TIME_SHIFT))	/* 2s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) #define WB_FRN_HIST_SLOTS	16	/* inode->i_wb_frn_history is 16bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) #define WB_FRN_HIST_UNIT	(WB_FRN_TIME_PERIOD / WB_FRN_HIST_SLOTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 					/* each slot's duration is 2s / 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) #define WB_FRN_HIST_THR_SLOTS	(WB_FRN_HIST_SLOTS / 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 					/* if foreign slots >= 8, switch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) #define WB_FRN_HIST_MAX_SLOTS	(WB_FRN_HIST_THR_SLOTS / 2 + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 					/* one round can affect upto 5 slots */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) #define WB_FRN_MAX_IN_FLIGHT	1024	/* don't queue too many concurrently */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) static atomic_t isw_nr_in_flight = ATOMIC_INIT(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) static struct workqueue_struct *isw_wq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) void __inode_attach_wb(struct inode *inode, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	struct backing_dev_info *bdi = inode_to_bdi(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 	struct bdi_writeback *wb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 	if (inode_cgwb_enabled(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 		struct cgroup_subsys_state *memcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 		if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 			memcg_css = mem_cgroup_css_from_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 			wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 			/* must pin memcg_css, see wb_get_create() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 			memcg_css = task_get_css(current, memory_cgrp_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 			wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 			css_put(memcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 	if (!wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 		wb = &bdi->wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 	 * There may be multiple instances of this function racing to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 	 * update the same inode.  Use cmpxchg() to tell the winner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 	if (unlikely(cmpxchg(&inode->i_wb, NULL, wb)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 		wb_put(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) EXPORT_SYMBOL_GPL(__inode_attach_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282)  * locked_inode_to_wb_and_lock_list - determine a locked inode's wb and lock it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283)  * @inode: inode of interest with i_lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285)  * Returns @inode's wb with its list_lock held.  @inode->i_lock must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286)  * held on entry and is released on return.  The returned wb is guaranteed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287)  * to stay @inode's associated wb until its list_lock is released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) static struct bdi_writeback *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) locked_inode_to_wb_and_lock_list(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 	__releases(&inode->i_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	__acquires(&wb->list_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 	while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 		struct bdi_writeback *wb = inode_to_wb(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 		 * inode_to_wb() association is protected by both
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 		 * @inode->i_lock and @wb->list_lock but list_lock nests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 		 * outside i_lock.  Drop i_lock and verify that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 		 * association hasn't changed after acquiring list_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 		wb_get(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 		spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 		spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 		/* i_wb may have changed inbetween, can't use inode_to_wb() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 		if (likely(wb == inode->i_wb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 			wb_put(wb);	/* @inode already has ref */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 			return wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 		spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 		wb_put(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 		cpu_relax();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 		spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321)  * inode_to_wb_and_lock_list - determine an inode's wb and lock it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322)  * @inode: inode of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324)  * Same as locked_inode_to_wb_and_lock_list() but @inode->i_lock isn't held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325)  * on entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 	__acquires(&wb->list_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	return locked_inode_to_wb_and_lock_list(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) struct inode_switch_wbs_context {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	struct inode		*inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	struct bdi_writeback	*new_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	struct rcu_head		rcu_head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	struct work_struct	work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	down_write(&bdi->wb_switch_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	up_write(&bdi->wb_switch_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) static void inode_switch_wbs_work_fn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	struct inode_switch_wbs_context *isw =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 		container_of(work, struct inode_switch_wbs_context, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	struct inode *inode = isw->inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	struct backing_dev_info *bdi = inode_to_bdi(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	struct bdi_writeback *old_wb = inode->i_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	struct bdi_writeback *new_wb = isw->new_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 	XA_STATE(xas, &mapping->i_pages, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	bool switched = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 	 * If @inode switches cgwb membership while sync_inodes_sb() is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	 * being issued, sync_inodes_sb() might miss it.  Synchronize.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	down_read(&bdi->wb_switch_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	 * By the time control reaches here, RCU grace period has passed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 	 * since I_WB_SWITCH assertion and all wb stat update transactions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 	 * between unlocked_inode_to_wb_begin/end() are guaranteed to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	 * synchronizing against the i_pages lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 	 * Grabbing old_wb->list_lock, inode->i_lock and the i_pages lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	 * gives us exclusion against all wb related operations on @inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	 * including IO list manipulations and stat updates.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	if (old_wb < new_wb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 		spin_lock(&old_wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 		spin_lock_nested(&new_wb->list_lock, SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 		spin_lock(&new_wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 		spin_lock_nested(&old_wb->list_lock, SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 	xa_lock_irq(&mapping->i_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 	 * Once I_FREEING is visible under i_lock, the eviction path owns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	 * the inode and we shouldn't modify ->i_io_list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	if (unlikely(inode->i_state & I_FREEING))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 		goto skip_switch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	trace_inode_switch_wbs(inode, old_wb, new_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 	 * Count and transfer stats.  Note that PAGECACHE_TAG_DIRTY points
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	 * to possibly dirty pages while PAGECACHE_TAG_WRITEBACK points to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	 * pages actually under writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 	xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_DIRTY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 		if (PageDirty(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 			dec_wb_stat(old_wb, WB_RECLAIMABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 			inc_wb_stat(new_wb, WB_RECLAIMABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	xas_set(&xas, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	xas_for_each_marked(&xas, page, ULONG_MAX, PAGECACHE_TAG_WRITEBACK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 		WARN_ON_ONCE(!PageWriteback(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		dec_wb_stat(old_wb, WB_WRITEBACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 		inc_wb_stat(new_wb, WB_WRITEBACK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 	wb_get(new_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	 * Transfer to @new_wb's IO list if necessary.  The specific list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	 * @inode was on is ignored and the inode is put on ->b_dirty which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	 * is always correct including from ->b_dirty_time.  The transfer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	 * preserves @inode->dirtied_when ordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	if (!list_empty(&inode->i_io_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 		struct inode *pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 		inode_io_list_del_locked(inode, old_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 		inode->i_wb = new_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 		list_for_each_entry(pos, &new_wb->b_dirty, i_io_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 			if (time_after_eq(inode->dirtied_when,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 					  pos->dirtied_when))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 		inode_io_list_move_locked(inode, new_wb, pos->i_io_list.prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 		inode->i_wb = new_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	/* ->i_wb_frn updates may race wbc_detach_inode() but doesn't matter */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	inode->i_wb_frn_winner = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	inode->i_wb_frn_avg_time = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	inode->i_wb_frn_history = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	switched = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) skip_switch:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	 * Paired with load_acquire in unlocked_inode_to_wb_begin() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	 * ensures that the new wb is visible if they see !I_WB_SWITCH.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	smp_store_release(&inode->i_state, inode->i_state & ~I_WB_SWITCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	xa_unlock_irq(&mapping->i_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 	spin_unlock(&new_wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	spin_unlock(&old_wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	up_read(&bdi->wb_switch_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	if (switched) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 		wb_wakeup(new_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 		wb_put(old_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 	wb_put(new_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 	iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 	kfree(isw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 	atomic_dec(&isw_nr_in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) static void inode_switch_wbs_rcu_fn(struct rcu_head *rcu_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	struct inode_switch_wbs_context *isw = container_of(rcu_head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 				struct inode_switch_wbs_context, rcu_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	/* needs to grab bh-unsafe locks, bounce to work item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	INIT_WORK(&isw->work, inode_switch_wbs_work_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 	queue_work(isw_wq, &isw->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483)  * inode_switch_wbs - change the wb association of an inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484)  * @inode: target inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485)  * @new_wb_id: ID of the new wb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487)  * Switch @inode's wb association to the wb identified by @new_wb_id.  The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488)  * switching is performed asynchronously and may fail silently.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) static void inode_switch_wbs(struct inode *inode, int new_wb_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	struct backing_dev_info *bdi = inode_to_bdi(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	struct cgroup_subsys_state *memcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	struct inode_switch_wbs_context *isw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 	/* noop if seems to be already in progress */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 	if (inode->i_state & I_WB_SWITCH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 	/* avoid queueing a new switch if too many are already in flight */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 	if (atomic_read(&isw_nr_in_flight) > WB_FRN_MAX_IN_FLIGHT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	isw = kzalloc(sizeof(*isw), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	if (!isw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	atomic_inc(&isw_nr_in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	/* find and pin the new wb */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 	memcg_css = css_from_id(new_wb_id, &memory_cgrp_subsys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 	if (memcg_css && !css_tryget(memcg_css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 		memcg_css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	if (!memcg_css)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	isw->new_wb = wb_get_create(bdi, memcg_css, GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	css_put(memcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	if (!isw->new_wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 	/* while holding I_WB_SWITCH, no one else can update the association */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 	spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 	if (!(inode->i_sb->s_flags & SB_ACTIVE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	    inode->i_state & (I_WB_SWITCH | I_FREEING) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	    inode_to_wb(inode) == isw->new_wb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 		spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 	inode->i_state |= I_WB_SWITCH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	__iget(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 	isw->inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	 * In addition to synchronizing among switchers, I_WB_SWITCH tells
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	 * the RCU protected stat update paths to grab the i_page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	 * lock so that stat transfer can synchronize against them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	 * Let's continue after I_WB_SWITCH is guaranteed to be visible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	call_rcu(&isw->rcu_head, inode_switch_wbs_rcu_fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	atomic_dec(&isw_nr_in_flight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	if (isw->new_wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 		wb_put(isw->new_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	kfree(isw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555)  * wbc_attach_and_unlock_inode - associate wbc with target inode and unlock it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556)  * @wbc: writeback_control of interest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557)  * @inode: target inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559)  * @inode is locked and about to be written back under the control of @wbc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560)  * Record @inode's writeback context into @wbc and unlock the i_lock.  On
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561)  * writeback completion, wbc_detach_inode() should be called.  This is used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562)  * to track the cgroup writeback context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) void wbc_attach_and_unlock_inode(struct writeback_control *wbc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 				 struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 	if (!inode_cgwb_enabled(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 		spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 	wbc->wb = inode_to_wb(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	wbc->inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	wbc->wb_id = wbc->wb->memcg_css->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 	wbc->wb_lcand_id = inode->i_wb_frn_winner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 	wbc->wb_tcand_id = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 	wbc->wb_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 	wbc->wb_lcand_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	wbc->wb_tcand_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	wb_get(wbc->wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	 * A dying wb indicates that either the blkcg associated with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	 * memcg changed or the associated memcg is dying.  In the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	 * case, a replacement wb should already be available and we should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	 * refresh the wb immediately.  In the second case, trying to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	 * refresh will keep failing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 	if (unlikely(wb_dying(wbc->wb) && !css_is_dying(wbc->wb->memcg_css)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 		inode_switch_wbs(inode, wbc->wb_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) EXPORT_SYMBOL_GPL(wbc_attach_and_unlock_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598)  * wbc_detach_inode - disassociate wbc from inode and perform foreign detection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599)  * @wbc: writeback_control of the just finished writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601)  * To be called after a writeback attempt of an inode finishes and undoes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602)  * wbc_attach_and_unlock_inode().  Can be called under any context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604)  * As concurrent write sharing of an inode is expected to be very rare and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605)  * memcg only tracks page ownership on first-use basis severely confining
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606)  * the usefulness of such sharing, cgroup writeback tracks ownership
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607)  * per-inode.  While the support for concurrent write sharing of an inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608)  * is deemed unnecessary, an inode being written to by different cgroups at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609)  * different points in time is a lot more common, and, more importantly,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610)  * charging only by first-use can too readily lead to grossly incorrect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611)  * behaviors (single foreign page can lead to gigabytes of writeback to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612)  * incorrectly attributed).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614)  * To resolve this issue, cgroup writeback detects the majority dirtier of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615)  * an inode and transfers the ownership to it.  To avoid unnnecessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616)  * oscillation, the detection mechanism keeps track of history and gives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617)  * out the switch verdict only if the foreign usage pattern is stable over
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618)  * a certain amount of time and/or writeback attempts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620)  * On each writeback attempt, @wbc tries to detect the majority writer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621)  * using Boyer-Moore majority vote algorithm.  In addition to the byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622)  * count from the majority voting, it also counts the bytes written for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623)  * current wb and the last round's winner wb (max of last round's current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624)  * wb, the winner from two rounds ago, and the last round's majority
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625)  * candidate).  Keeping track of the historical winner helps the algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626)  * to semi-reliably detect the most active writer even when it's not the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627)  * absolute majority.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629)  * Once the winner of the round is determined, whether the winner is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630)  * foreign or not and how much IO time the round consumed is recorded in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631)  * inode->i_wb_frn_history.  If the amount of recorded foreign IO time is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632)  * over a certain threshold, the switch verdict is given.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) void wbc_detach_inode(struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	struct bdi_writeback *wb = wbc->wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	struct inode *inode = wbc->inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	unsigned long avg_time, max_bytes, max_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	u16 history;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	int max_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	if (!wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	history = inode->i_wb_frn_history;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	avg_time = inode->i_wb_frn_avg_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 	/* pick the winner of this round */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 	if (wbc->wb_bytes >= wbc->wb_lcand_bytes &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	    wbc->wb_bytes >= wbc->wb_tcand_bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 		max_id = wbc->wb_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 		max_bytes = wbc->wb_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	} else if (wbc->wb_lcand_bytes >= wbc->wb_tcand_bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 		max_id = wbc->wb_lcand_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 		max_bytes = wbc->wb_lcand_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 		max_id = wbc->wb_tcand_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 		max_bytes = wbc->wb_tcand_bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	 * Calculate the amount of IO time the winner consumed and fold it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 	 * into the running average kept per inode.  If the consumed IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 	 * time is lower than avag / WB_FRN_TIME_CUT_DIV, ignore it for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	 * deciding whether to switch or not.  This is to prevent one-off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	 * small dirtiers from skewing the verdict.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	max_time = DIV_ROUND_UP((max_bytes >> PAGE_SHIFT) << WB_FRN_TIME_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 				wb->avg_write_bandwidth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 	if (avg_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 		avg_time += (max_time >> WB_FRN_TIME_AVG_SHIFT) -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 			    (avg_time >> WB_FRN_TIME_AVG_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 		avg_time = max_time;	/* immediate catch up on first run */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	if (max_time >= avg_time / WB_FRN_TIME_CUT_DIV) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 		int slots;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 		 * The switch verdict is reached if foreign wb's consume
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 		 * more than a certain proportion of IO time in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 		 * WB_FRN_TIME_PERIOD.  This is loosely tracked by 16 slot
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 		 * history mask where each bit represents one sixteenth of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 		 * the period.  Determine the number of slots to shift into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 		 * history from @max_time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 		slots = min(DIV_ROUND_UP(max_time, WB_FRN_HIST_UNIT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 			    (unsigned long)WB_FRN_HIST_MAX_SLOTS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 		history <<= slots;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 		if (wbc->wb_id != max_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 			history |= (1U << slots) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 		if (history)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 			trace_inode_foreign_history(inode, wbc, history);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 		 * Switch if the current wb isn't the consistent winner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 		 * If there are multiple closely competing dirtiers, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 		 * inode may switch across them repeatedly over time, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 		 * is okay.  The main goal is avoiding keeping an inode on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 		 * the wrong wb for an extended period of time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 		if (hweight32(history) > WB_FRN_HIST_THR_SLOTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 			inode_switch_wbs(inode, max_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	 * Multiple instances of this function may race to update the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	 * following fields but we don't mind occassional inaccuracies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 	inode->i_wb_frn_winner = max_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 	inode->i_wb_frn_avg_time = min(avg_time, (unsigned long)U16_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 	inode->i_wb_frn_history = history;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	wb_put(wbc->wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	wbc->wb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) EXPORT_SYMBOL_GPL(wbc_detach_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721)  * wbc_account_cgroup_owner - account writeback to update inode cgroup ownership
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722)  * @wbc: writeback_control of the writeback in progress
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723)  * @page: page being written out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724)  * @bytes: number of bytes being written out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726)  * @bytes from @page are about to written out during the writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727)  * controlled by @wbc.  Keep the book for foreign inode detection.  See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728)  * wbc_detach_inode().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 			      size_t bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	struct cgroup_subsys_state *css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	int id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 	 * pageout() path doesn't attach @wbc to the inode being written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	 * out.  This is intentional as we don't want the function to block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	 * behind a slow cgroup.  Ultimately, we want pageout() to kick off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 	 * regular writeback instead of writing things out itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 	if (!wbc->wb || wbc->no_cgroup_owner)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	css = mem_cgroup_css_from_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	/* dead cgroups shouldn't contribute to inode ownership arbitration */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	if (!(css->flags & CSS_ONLINE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	id = css->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	if (id == wbc->wb_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 		wbc->wb_bytes += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	if (id == wbc->wb_lcand_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 		wbc->wb_lcand_bytes += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	/* Boyer-Moore majority vote algorithm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	if (!wbc->wb_tcand_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 		wbc->wb_tcand_id = id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	if (id == wbc->wb_tcand_id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 		wbc->wb_tcand_bytes += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 		wbc->wb_tcand_bytes -= min(bytes, wbc->wb_tcand_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) EXPORT_SYMBOL_GPL(wbc_account_cgroup_owner);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771)  * inode_congested - test whether an inode is congested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772)  * @inode: inode to test for congestion (may be NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773)  * @cong_bits: mask of WB_[a]sync_congested bits to test
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775)  * Tests whether @inode is congested.  @cong_bits is the mask of congestion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776)  * bits to test and the return value is the mask of set bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778)  * If cgroup writeback is enabled for @inode, the congestion state is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779)  * determined by whether the cgwb (cgroup bdi_writeback) for the blkcg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780)  * associated with @inode is congested; otherwise, the root wb's congestion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781)  * state is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783)  * @inode is allowed to be NULL as this function is often called on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784)  * mapping->host which is NULL for the swapper space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) int inode_congested(struct inode *inode, int cong_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	 * Once set, ->i_wb never becomes NULL while the inode is alive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 	 * Start transaction iff ->i_wb is visible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	if (inode && inode_to_wb_is_valid(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 		struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 		struct wb_lock_cookie lock_cookie = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 		bool congested;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 		wb = unlocked_inode_to_wb_begin(inode, &lock_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 		congested = wb_congested(wb, cong_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 		unlocked_inode_to_wb_end(inode, &lock_cookie);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 		return congested;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	return wb_congested(&inode_to_bdi(inode)->wb, cong_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) EXPORT_SYMBOL_GPL(inode_congested);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808)  * wb_split_bdi_pages - split nr_pages to write according to bandwidth
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809)  * @wb: target bdi_writeback to split @nr_pages to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810)  * @nr_pages: number of pages to write for the whole bdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812)  * Split @wb's portion of @nr_pages according to @wb's write bandwidth in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813)  * relation to the total write bandwidth of all wb's w/ dirty inodes on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814)  * @wb->bdi.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	unsigned long this_bw = wb->avg_write_bandwidth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	if (nr_pages == LONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 		return LONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	 * This may be called on clean wb's and proportional distribution
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	 * may not make sense, just use the original @nr_pages in those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	 * cases.  In general, we wanna err on the side of writing more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	if (!tot_bw || this_bw >= tot_bw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 		return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 		return DIV_ROUND_UP_ULL((u64)nr_pages * this_bw, tot_bw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836)  * bdi_split_work_to_wbs - split a wb_writeback_work to all wb's of a bdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837)  * @bdi: target backing_dev_info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838)  * @base_work: wb_writeback_work to issue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839)  * @skip_if_busy: skip wb's which already have writeback in progress
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841)  * Split and issue @base_work to all wb's (bdi_writeback's) of @bdi which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842)  * have dirty inodes.  If @base_work->nr_page isn't %LONG_MAX, it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843)  * distributed to the busy wbs according to each wb's proportion in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844)  * total active write bandwidth of @bdi.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 				  struct wb_writeback_work *base_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 				  bool skip_if_busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 	struct bdi_writeback *last_wb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	struct bdi_writeback *wb = list_entry(&bdi->wb_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 					      struct bdi_writeback, bdi_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	list_for_each_entry_continue_rcu(wb, &bdi->wb_list, bdi_node) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 		DEFINE_WB_COMPLETION(fallback_work_done, bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 		struct wb_writeback_work fallback_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 		struct wb_writeback_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 		long nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 		if (last_wb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 			wb_put(last_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 			last_wb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 		/* SYNC_ALL writes out I_DIRTY_TIME too */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 		if (!wb_has_dirty_io(wb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 		    (base_work->sync_mode == WB_SYNC_NONE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 		     list_empty(&wb->b_dirty_time)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 		if (skip_if_busy && writeback_in_progress(wb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 		nr_pages = wb_split_bdi_pages(wb, base_work->nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 		work = kmalloc(sizeof(*work), GFP_ATOMIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 		if (work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 			*work = *base_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 			work->nr_pages = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 			work->auto_free = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 			wb_queue_work(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 		/* alloc failed, execute synchronously using on-stack fallback */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 		work = &fallback_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 		*work = *base_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 		work->nr_pages = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		work->auto_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 		work->done = &fallback_work_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 		wb_queue_work(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		 * Pin @wb so that it stays on @bdi->wb_list.  This allows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 		 * continuing iteration from @wb after dropping and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 		 * regrabbing rcu read lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 		wb_get(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 		last_wb = wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 		wb_wait_for_completion(&fallback_work_done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 		goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	if (last_wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 		wb_put(last_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915)  * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916)  * @bdi_id: target bdi id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917)  * @memcg_id: target memcg css id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918)  * @nr: number of pages to write, 0 for best-effort dirty flushing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919)  * @reason: reason why some writeback work initiated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920)  * @done: target wb_completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922)  * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923)  * with the specified parameters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 			   enum wb_reason reason, struct wb_completion *done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	struct backing_dev_info *bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	struct cgroup_subsys_state *memcg_css;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	struct wb_writeback_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	/* lookup bdi and memcg */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 	bdi = bdi_get_by_id(bdi_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	if (!bdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 		return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 	memcg_css = css_from_id(memcg_id, &memory_cgrp_subsys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 	if (memcg_css && !css_tryget(memcg_css))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 		memcg_css = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	if (!memcg_css) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 		ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 		goto out_bdi_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 	 * And find the associated wb.  If the wb isn't there already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 	 * there's nothing to flush, don't create one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 	wb = wb_get_lookup(bdi, memcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 	if (!wb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 		ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 		goto out_css_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 	 * If @nr is zero, the caller is attempting to write out most of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 	 * the currently dirty pages.  Let's take the current dirty page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 	 * count and inflate it by 25% which should be large enough to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 	 * flush out most dirty pages while avoiding getting livelocked by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	 * concurrent dirtiers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 	if (!nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 		unsigned long filepages, headroom, dirty, writeback;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 		mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 				      &writeback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 		nr = dirty * 10 / 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 	/* issue the writeback work */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 	work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	if (work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 		work->nr_pages = nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 		work->sync_mode = WB_SYNC_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 		work->range_cyclic = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 		work->reason = reason;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 		work->done = done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 		work->auto_free = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 		wb_queue_work(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 		ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	wb_put(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) out_css_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	css_put(memcg_css);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) out_bdi_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 	bdi_put(bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998)  * cgroup_writeback_umount - flush inode wb switches for umount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)  * This function is called when a super_block is about to be destroyed and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001)  * flushes in-flight inode wb switches.  An inode wb switch goes through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)  * RCU and then workqueue, so the two need to be flushed in order to ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)  * that all previously scheduled switches are finished.  As wb switches are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004)  * rare occurrences and synchronize_rcu() can take a while, perform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)  * flushing iff wb switches are in flight.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) void cgroup_writeback_umount(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 	if (atomic_read(&isw_nr_in_flight)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 		 * Use rcu_barrier() to wait for all pending callbacks to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 		 * ensure that all in-flight wb switches are in the workqueue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 		rcu_barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 		flush_workqueue(isw_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) static int __init cgroup_writeback_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	isw_wq = alloc_workqueue("inode_switch_wbs", 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	if (!isw_wq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) fs_initcall(cgroup_writeback_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) #else	/* CONFIG_CGROUP_WRITEBACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) static void bdi_down_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) static void bdi_up_write_wb_switch_rwsem(struct backing_dev_info *bdi) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) static struct bdi_writeback *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) locked_inode_to_wb_and_lock_list(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 	__releases(&inode->i_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 	__acquires(&wb->list_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	struct bdi_writeback *wb = inode_to_wb(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 	return wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) static struct bdi_writeback *inode_to_wb_and_lock_list(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	__acquires(&wb->list_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	struct bdi_writeback *wb = inode_to_wb(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	return wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) static long wb_split_bdi_pages(struct bdi_writeback *wb, long nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) static void bdi_split_work_to_wbs(struct backing_dev_info *bdi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 				  struct wb_writeback_work *base_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 				  bool skip_if_busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	if (!skip_if_busy || !writeback_in_progress(&bdi->wb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 		base_work->auto_free = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 		wb_queue_work(&bdi->wb, base_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) #endif	/* CONFIG_CGROUP_WRITEBACK */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074)  * Add in the number of potentially dirty inodes, because each inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075)  * write can dirty pagecache in the underlying blockdev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) static unsigned long get_nr_dirty_pages(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	return global_node_page_state(NR_FILE_DIRTY) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 		get_nr_dirty_inodes();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) static void wb_start_writeback(struct bdi_writeback *wb, enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 	if (!wb_has_dirty_io(wb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 	 * All callers of this function want to start writeback of all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	 * dirty pages. Places like vmscan can call this at a very
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	 * high frequency, causing pointless allocations of tons of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	 * work items and keeping the flusher threads busy retrieving
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	 * that work. Ensure that we only allow one of them pending and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 	 * inflight at the time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	if (test_bit(WB_start_all, &wb->state) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	    test_and_set_bit(WB_start_all, &wb->state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	wb->start_all_reason = reason;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	wb_wakeup(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105)  * wb_start_background_writeback - start background writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)  * @wb: bdi_writback to write from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108)  * Description:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109)  *   This makes sure WB_SYNC_NONE background writeback happens. When
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)  *   this function returns, it is only guaranteed that for given wb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)  *   some IO is happening if we are over background dirty threshold.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112)  *   Caller need not hold sb s_umount semaphore.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) void wb_start_background_writeback(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	 * We just wake up the flusher thread. It will perform background
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	 * writeback as soon as there is no other work to do.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	trace_writeback_wake_background(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	wb_wakeup(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125)  * Remove the inode from the writeback list it is on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) void inode_io_list_del(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	wb = inode_to_wb_and_lock_list(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 	spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	inode_io_list_del_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) EXPORT_SYMBOL(inode_io_list_del);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140)  * mark an inode as under writeback on the sb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) void sb_mark_inode_writeback(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	if (list_empty(&inode->i_wb_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 		spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 		if (list_empty(&inode->i_wb_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 			list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 			trace_sb_mark_inode_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 		spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)  * clear an inode as under writeback on the sb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) void sb_clear_inode_writeback(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 	if (!list_empty(&inode->i_wb_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 		spin_lock_irqsave(&sb->s_inode_wblist_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 		if (!list_empty(&inode->i_wb_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 			list_del_init(&inode->i_wb_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 			trace_sb_clear_inode_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 		spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176)  * Redirty an inode: set its when-it-was dirtied timestamp and move it to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)  * furthest end of its superblock's dirty-inode list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179)  * Before stamping the inode's ->dirtied_when, we check to see whether it is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180)  * already the most-recently-dirtied inode on the b_dirty list.  If that is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)  * the case then the inode must have been redirtied while it was being written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182)  * out and we don't reset its dirtied_when.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) static void redirty_tail_locked(struct inode *inode, struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	assert_spin_locked(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 	if (!list_empty(&wb->b_dirty)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 		struct inode *tail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		tail = wb_inode(wb->b_dirty.next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 		if (time_before(inode->dirtied_when, tail->dirtied_when))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 			inode->dirtied_when = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	inode_io_list_move_locked(inode, wb, &wb->b_dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 	inode->i_state &= ~I_SYNC_QUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) static void redirty_tail(struct inode *inode, struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	redirty_tail_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207)  * requeue inode for re-scanning after bdi->b_io list is exhausted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	inode_io_list_move_locked(inode, wb, &wb->b_more_io);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) static void inode_sync_complete(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	inode->i_state &= ~I_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	/* If inode is clean an unused, put it into LRU now... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	inode_add_lru(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	/* Waiters must see I_SYNC cleared before being woken up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 	smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 	wake_up_bit(&inode->i_state, __I_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) static bool inode_dirtied_after(struct inode *inode, unsigned long t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 	bool ret = time_after(inode->dirtied_when, t);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) #ifndef CONFIG_64BIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 	 * For inodes being constantly redirtied, dirtied_when can get stuck.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	 * It _appears_ to be in the future, but is actually in distant past.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	 * This test is necessary to prevent such wrapped-around relative times
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	 * from permanently stopping the whole bdi writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	ret = ret && time_before_eq(inode->dirtied_when, jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) #define EXPIRE_DIRTY_ATIME 0x0001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242)  * Move expired (dirtied before dirtied_before) dirty inodes from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)  * @delaying_queue to @dispatch_queue.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) static int move_expired_inodes(struct list_head *delaying_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 			       struct list_head *dispatch_queue,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 			       unsigned long dirtied_before)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 	LIST_HEAD(tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	struct list_head *pos, *node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	struct super_block *sb = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 	int do_sb_sort = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	int moved = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	while (!list_empty(delaying_queue)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 		inode = wb_inode(delaying_queue->prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 		if (inode_dirtied_after(inode, dirtied_before))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 		list_move(&inode->i_io_list, &tmp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 		moved++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 		spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 		inode->i_state |= I_SYNC_QUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 		spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 		if (sb_is_blkdev_sb(inode->i_sb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 		if (sb && sb != inode->i_sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 			do_sb_sort = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 		sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	/* just one sb in list, splice to dispatch_queue and we're done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	if (!do_sb_sort) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 		list_splice(&tmp, dispatch_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	/* Move inodes from one superblock together */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	while (!list_empty(&tmp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 		sb = wb_inode(tmp.prev)->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 		list_for_each_prev_safe(pos, node, &tmp) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 			inode = wb_inode(pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 			if (inode->i_sb == sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 				list_move(&inode->i_io_list, dispatch_queue);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	return moved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292)  * Queue all expired dirty inodes for io, eldest first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293)  * Before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294)  *         newly dirtied     b_dirty    b_io    b_more_io
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295)  *         =============>    gf         edc     BA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)  * After
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297)  *         newly dirtied     b_dirty    b_io    b_more_io
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298)  *         =============>    g          fBAedc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299)  *                                           |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300)  *                                           +--> dequeue for IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 		     unsigned long dirtied_before)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 	int moved;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	unsigned long time_expire_jif = dirtied_before;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	assert_spin_locked(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 	list_splice_init(&wb->b_more_io, &wb->b_io);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 	moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, dirtied_before);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 	if (!work->for_sync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 		time_expire_jif = jiffies - dirtytime_expire_interval * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 	moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 				     time_expire_jif);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	if (moved)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 		wb_io_lists_populated(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 	trace_writeback_queue_io(wb, work, dirtied_before, moved);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) static int write_inode(struct inode *inode, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 		trace_writeback_write_inode_start(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 		ret = inode->i_sb->s_op->write_inode(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 		trace_writeback_write_inode(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334)  * Wait for writeback on an inode to complete. Called with i_lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335)  * Caller must make sure inode cannot go away when we drop i_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) static void __inode_wait_for_writeback(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 	__releases(inode->i_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 	__acquires(inode->i_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 	wait_queue_head_t *wqh;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	while (inode->i_state & I_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 		spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 		__wait_on_bit(wqh, &wq, bit_wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 			      TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 		spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)  * Wait for writeback on an inode to complete. Caller must have inode pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) void inode_wait_for_writeback(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 	__inode_wait_for_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364)  * Sleep until I_SYNC is cleared. This function must be called with i_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365)  * held and drops it. It is aimed for callers not holding any inode reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366)  * so once i_lock is dropped, inode can go away.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) static void inode_sleep_on_writeback(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	__releases(inode->i_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	int sleep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 	sleep = inode->i_state & I_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 	if (sleep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 		schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 	finish_wait(wqh, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384)  * Find proper writeback list for the inode depending on its current state and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385)  * possibly also change of its state while we were doing writeback.  Here we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)  * handle things such as livelock prevention or fairness of writeback among
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387)  * inodes. This function can be called only by flusher thread - noone else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)  * processes all inodes in writeback lists and requeueing inodes behind flusher
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389)  * thread's back can have unexpected consequences.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 			  struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 	if (inode->i_state & I_FREEING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 	 * Sync livelock prevention. Each inode is tagged and synced in one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 	 * shot. If still dirty, it will be redirty_tail()'ed below.  Update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 	 * the dirty time to prevent enqueue and sync it again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 	if ((inode->i_state & I_DIRTY) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	    (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 		inode->dirtied_when = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	if (wbc->pages_skipped) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 		 * writeback is not making progress due to locked
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 		 * buffers. Skip this inode for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 		redirty_tail_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 	if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 		 * We didn't write back all the pages.  nfs_writepages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 		 * sometimes bales out without doing anything.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 		if (wbc->nr_to_write <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 			/* Slice used up. Queue for next turn. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 			requeue_io(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 			 * Writeback blocked by something other than
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 			 * congestion. Delay the inode for some time to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 			 * avoid spinning on the CPU (100% iowait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 			 * retrying writeback of the dirty page/inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 			 * that cannot be performed immediately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 			redirty_tail_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 	} else if (inode->i_state & I_DIRTY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 		 * Filesystems can dirty the inode during writeback operations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 		 * such as delayed allocation during submission or metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 		 * updates after data IO completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 		redirty_tail_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 	} else if (inode->i_state & I_DIRTY_TIME) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 		inode->dirtied_when = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 		inode_io_list_move_locked(inode, wb, &wb->b_dirty_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 		inode->i_state &= ~I_SYNC_QUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 		/* The inode is clean. Remove from writeback lists. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 		inode_io_list_del_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451)  * Write out an inode and its dirty pages. Do not update the writeback list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452)  * linkage. That is left to the caller. The caller is also responsible for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453)  * setting I_SYNC flag and calling inode_sync_complete() to clear it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 	struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 	long nr_to_write = wbc->nr_to_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 	unsigned dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 	WARN_ON(!(inode->i_state & I_SYNC));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 	trace_writeback_single_inode_start(inode, wbc, nr_to_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 	ret = do_writepages(mapping, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 	 * Make sure to wait on the data before writing out the metadata.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 	 * This is important for filesystems that modify metadata on data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 	 * I/O completion. We don't do it for sync(2) writeback because it has a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 	 * separate, external IO completion path and ->sync_fs for guaranteeing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 	 * inode metadata is written back correctly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 	if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 		int err = filemap_fdatawait(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 		if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 			ret = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 	 * If the inode has dirty timestamps and we need to write them, call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 	 * mark_inode_dirty_sync() to notify the filesystem about it and to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	 * change I_DIRTY_TIME into I_DIRTY_SYNC.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 	if ((inode->i_state & I_DIRTY_TIME) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 	    (wbc->sync_mode == WB_SYNC_ALL || wbc->for_sync ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 	     time_after(jiffies, inode->dirtied_time_when +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 			dirtytime_expire_interval * HZ))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 		trace_writeback_lazytime(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 		mark_inode_dirty_sync(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 	 * Some filesystems may redirty the inode during the writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 	 * due to delalloc, clear dirty metadata flags right before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 	 * write_inode()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 	dirty = inode->i_state & I_DIRTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	inode->i_state &= ~dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	 * Paired with smp_mb() in __mark_inode_dirty().  This allows
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 	 * __mark_inode_dirty() to test i_state without grabbing i_lock -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 	 * either they see the I_DIRTY bits cleared or we see the dirtied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 	 * inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 	 * I_DIRTY_PAGES is always cleared together above even if @mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	 * still has dirty pages.  The flag is reinstated after smp_mb() if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	 * necessary.  This guarantees that either __mark_inode_dirty()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 	 * sees clear I_DIRTY_PAGES or we see PAGECACHE_TAG_DIRTY.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 		inode->i_state |= I_DIRTY_PAGES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 	/* Don't write the inode if only I_DIRTY_PAGES was set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 	if (dirty & ~I_DIRTY_PAGES) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 		int err = write_inode(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 		if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 			ret = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 	trace_writeback_single_inode(inode, wbc, nr_to_write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533)  * Write out an inode's dirty pages. Either the caller has an active reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534)  * on the inode or the inode has I_WILL_FREE set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536)  * This function is designed to be called for writing back one inode which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537)  * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538)  * and does more profound writeback list handling in writeback_sb_inodes().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) static int writeback_single_inode(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 				  struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 	struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 	spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 	if (!atomic_read(&inode->i_count))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 		WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 		WARN_ON(inode->i_state & I_WILL_FREE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 	if (inode->i_state & I_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 		if (wbc->sync_mode != WB_SYNC_ALL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 		 * It's a data-integrity sync. We must wait. Since callers hold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 		 * inode reference or inode has I_WILL_FREE set, it cannot go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 		 * away under us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 		__inode_wait_for_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 	WARN_ON(inode->i_state & I_SYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 	 * Skip inode if it is clean and we have no outstanding writeback in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 	 * WB_SYNC_ALL mode. We don't want to mess with writeback lists in this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 	 * function since flusher thread may be doing for example sync in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 	 * parallel and if we move the inode, it could get skipped. So here we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 	 * make sure inode is on some writeback list and leave it there unless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 	 * we have completely cleaned the inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 	if (!(inode->i_state & I_DIRTY_ALL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 	    (wbc->sync_mode != WB_SYNC_ALL ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 	     !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 	inode->i_state |= I_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 	wbc_attach_and_unlock_inode(wbc, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 	ret = __writeback_single_inode(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 	wbc_detach_inode(wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 	wb = inode_to_wb_and_lock_list(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 	spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	 * If inode is clean, remove it from writeback lists. Otherwise don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	 * touch it. See comment above for explanation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	if (!(inode->i_state & I_DIRTY_ALL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 		inode_io_list_del_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 	spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 	inode_sync_complete(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) static long writeback_chunk_size(struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 				 struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 	long pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 	 * WB_SYNC_ALL mode does livelock avoidance by syncing dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 	 * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 	 * here avoids calling into writeback_inodes_wb() more than once.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 	 * The intended call sequence for WB_SYNC_ALL writeback is:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 	 *      wb_writeback()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 	 *          writeback_sb_inodes()       <== called only once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 	 *              write_cache_pages()     <== called once for each inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	 *                   (quickly) tag currently dirty pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	 *                   (maybe slowly) sync all tagged pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 		pages = LONG_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 		pages = min(wb->avg_write_bandwidth / 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 			    global_wb_domain.dirty_limit / DIRTY_SCOPE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 		pages = min(pages, work->nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 		pages = round_down(pages + MIN_WRITEBACK_PAGES,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 				   MIN_WRITEBACK_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 	return pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629)  * Write a portion of b_io inodes which belong to @sb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631)  * Return the number of pages and/or inodes written.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633)  * NOTE! This is called with wb->list_lock held, and will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634)  * unlock and relock that for each inode it ends up doing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635)  * IO for.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) static long writeback_sb_inodes(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 				struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 				struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 	struct writeback_control wbc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 		.sync_mode		= work->sync_mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 		.tagged_writepages	= work->tagged_writepages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 		.for_kupdate		= work->for_kupdate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 		.for_background		= work->for_background,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 		.for_sync		= work->for_sync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 		.range_cyclic		= work->range_cyclic,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 		.range_start		= 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 		.range_end		= LLONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 	unsigned long start_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 	long write_chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 	long wrote = 0;  /* count both pages and inodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 	while (!list_empty(&wb->b_io)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 		struct inode *inode = wb_inode(wb->b_io.prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 		struct bdi_writeback *tmp_wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 		if (inode->i_sb != sb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 			if (work->sb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 				/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 				 * We only want to write back data for this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 				 * superblock, move all inodes not belonging
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 				 * to it back onto the dirty list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 				 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 				redirty_tail(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 			 * The inode belongs to a different superblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 			 * Bounce back to the caller to unpin this and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 			 * pin the next superblock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 		 * Don't bother with new inodes or inodes being freed, first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 		 * kind does not need periodic writeout yet, and for the latter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 		 * kind writeout is handled by the freer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 		spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 			redirty_tail_locked(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 			spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 		if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 			 * If this inode is locked for writeback and we are not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 			 * doing writeback-for-data-integrity, move it to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 			 * b_more_io so that writeback can proceed with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 			 * other inodes on s_io.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 			 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 			 * We'll have another go at writing back this inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 			 * when we completed a full scan of b_io.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 			spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 			requeue_io(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 			trace_writeback_sb_inodes_requeue(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 		spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 		 * We already requeued the inode if it had I_SYNC set and we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 		 * are doing WB_SYNC_NONE writeback. So this catches only the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 		 * WB_SYNC_ALL case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 		if (inode->i_state & I_SYNC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 			/* Wait for I_SYNC. This function drops i_lock... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 			inode_sleep_on_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 			/* Inode may be gone, start again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 			spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 		inode->i_state |= I_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 		wbc_attach_and_unlock_inode(&wbc, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 		write_chunk = writeback_chunk_size(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 		wbc.nr_to_write = write_chunk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 		wbc.pages_skipped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 		 * We use I_SYNC to pin the inode in memory. While it is set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 		 * evict_inode() will wait so the inode cannot be freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 		__writeback_single_inode(inode, &wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 		wbc_detach_inode(&wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 		work->nr_pages -= write_chunk - wbc.nr_to_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 		wrote += write_chunk - wbc.nr_to_write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 		if (need_resched()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 			 * We're trying to balance between building up a nice
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 			 * long list of IOs to improve our merge rate, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 			 * getting those IOs out quickly for anyone throttling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 			 * in balance_dirty_pages().  cond_resched() doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 			 * unplug, so get our IOs out the door before we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 			 * give up the CPU.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 			blk_flush_plug(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 			cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 		 * Requeue @inode if still dirty.  Be careful as @inode may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 		 * have been switched to another wb in the meantime.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 		tmp_wb = inode_to_wb_and_lock_list(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 		spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 		if (!(inode->i_state & I_DIRTY_ALL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 			wrote++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 		requeue_inode(inode, tmp_wb, &wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 		inode_sync_complete(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 		spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 		if (unlikely(tmp_wb != wb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 			spin_unlock(&tmp_wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 			spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 		 * bail out to wb_writeback() often enough to check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 		 * background threshold and other termination conditions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 		if (wrote) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 			if (time_is_before_jiffies(start_time + HZ / 10UL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 			if (work->nr_pages <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 	return wrote;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) static long __writeback_inodes_wb(struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 				  struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 	unsigned long start_time = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 	long wrote = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 	while (!list_empty(&wb->b_io)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 		struct inode *inode = wb_inode(wb->b_io.prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 		struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 		if (!trylock_super(sb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 			 * trylock_super() may fail consistently due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 			 * s_umount being grabbed by someone else. Don't use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 			 * requeue_io() to avoid busy retrying the inode/sb.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 			redirty_tail(inode, wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 		wrote += writeback_sb_inodes(sb, wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 		up_read(&sb->s_umount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 		/* refer to the same tests at the end of writeback_sb_inodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 		if (wrote) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 			if (time_is_before_jiffies(start_time + HZ / 10UL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 			if (work->nr_pages <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 	/* Leave any unwritten inodes on b_io */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 	return wrote;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 				enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 	struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 		.nr_pages	= nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 		.sync_mode	= WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 		.range_cyclic	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 		.reason		= reason,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 	struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 	blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 	spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 	if (list_empty(&wb->b_io))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 		queue_io(wb, &work, jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 	__writeback_inodes_wb(wb, &work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 	spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 	blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 	return nr_pages - work.nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836)  * Explicit flushing or periodic writeback of "old" data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838)  * Define "old": the first time one of an inode's pages is dirtied, we mark the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839)  * dirtying-time in the inode's address_space.  So this periodic writeback code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)  * just walks the superblock inode list, writing back any inodes which are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841)  * older than a specific point in time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843)  * Try to run once per dirty_writeback_interval.  But if a writeback event
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844)  * takes longer than a dirty_writeback_interval interval, then leave a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845)  * one-second gap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)  * dirtied_before takes precedence over nr_to_write.  So we'll only write back
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848)  * all dirty pages if they are all attached to "old" mappings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) static long wb_writeback(struct bdi_writeback *wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 			 struct wb_writeback_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 	unsigned long wb_start = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 	long nr_pages = work->nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 	unsigned long dirtied_before = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 	struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 	long progress;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 	struct blk_plug plug;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 	blk_start_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 	spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 	for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 		 * Stop writeback when nr_pages has been consumed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 		if (work->nr_pages <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 		 * Background writeout and kupdate-style writeback may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 		 * run forever. Stop them if there is other work to do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 		 * so that e.g. sync can proceed. They'll be restarted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 		 * after the other works are all done.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 		if ((work->for_background || work->for_kupdate) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 		    !list_empty(&wb->work_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 		 * For background writeout, stop when we are below the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 		 * background dirty threshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 		if (work->for_background && !wb_over_bg_thresh(wb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 		 * Kupdate and background works are special and we want to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 		 * include all inodes that need writing. Livelock avoidance is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 		 * handled by these works yielding to any other work so we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 		 * safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 		if (work->for_kupdate) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 			dirtied_before = jiffies -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 				msecs_to_jiffies(dirty_expire_interval * 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 		} else if (work->for_background)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 			dirtied_before = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 		trace_writeback_start(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 		if (list_empty(&wb->b_io))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 			queue_io(wb, work, dirtied_before);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 		if (work->sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 			progress = writeback_sb_inodes(work->sb, wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 			progress = __writeback_inodes_wb(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 		trace_writeback_written(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 		wb_update_bandwidth(wb, wb_start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 		 * Did we write something? Try for more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 		 * Dirty inodes are moved to b_io for writeback in batches.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 		 * The completion of the current batch does not necessarily
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 		 * mean the overall work is done. So we keep looping as long
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 		 * as made some progress on cleaning pages or inodes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 		if (progress)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 		 * No more inodes for IO, bail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 		if (list_empty(&wb->b_more_io))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 		 * Nothing written. Wait for some inode to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 		 * become available for writeback. Otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 		 * we'll just busyloop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 		trace_writeback_wait(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 		inode = wb_inode(wb->b_more_io.prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 		spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 		spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 		/* This function drops i_lock... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 		inode_sleep_on_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 		spin_lock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 	spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 	blk_finish_plug(&plug);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 	return nr_pages - work->nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944)  * Return the next wb_writeback_work struct that hasn't been processed yet.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) static struct wb_writeback_work *get_next_work_item(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 	struct wb_writeback_work *work = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 	spin_lock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	if (!list_empty(&wb->work_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 		work = list_entry(wb->work_list.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 				  struct wb_writeback_work, list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 		list_del_init(&work->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 	spin_unlock_bh(&wb->work_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) static long wb_check_background_flush(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 	if (wb_over_bg_thresh(wb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 		struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 			.nr_pages	= LONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 			.sync_mode	= WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 			.for_background	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 			.range_cyclic	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) 			.reason		= WB_REASON_BACKGROUND,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 		};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 		return wb_writeback(wb, &work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) static long wb_check_old_data_flush(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 	unsigned long expired;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 	long nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 	 * When set to zero, disable periodic writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 	if (!dirty_writeback_interval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 	expired = wb->last_old_flush +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 			msecs_to_jiffies(dirty_writeback_interval * 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 	if (time_before(jiffies, expired))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 	wb->last_old_flush = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 	nr_pages = get_nr_dirty_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 	if (nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 		struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 			.nr_pages	= nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 			.sync_mode	= WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 			.for_kupdate	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 			.range_cyclic	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 			.reason		= WB_REASON_PERIODIC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 		};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 		return wb_writeback(wb, &work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) static long wb_check_start_all(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 	long nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 	if (!test_bit(WB_start_all, &wb->state))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 	nr_pages = get_nr_dirty_pages();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 	if (nr_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 		struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 			.nr_pages	= wb_split_bdi_pages(wb, nr_pages),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 			.sync_mode	= WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 			.range_cyclic	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 			.reason		= wb->start_all_reason,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 		};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 		nr_pages = wb_writeback(wb, &work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 	clear_bit(WB_start_all, &wb->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 	return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037)  * Retrieve work items and do the writeback they describe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) static long wb_do_writeback(struct bdi_writeback *wb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 	struct wb_writeback_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 	long wrote = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 	set_bit(WB_writeback_running, &wb->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 	while ((work = get_next_work_item(wb)) != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 		trace_writeback_exec(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 		wrote += wb_writeback(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 		finish_writeback_work(wb, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 	 * Check for a flush-everything request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 	wrote += wb_check_start_all(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 	 * Check for periodic writeback, kupdated() style
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 	wrote += wb_check_old_data_flush(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 	wrote += wb_check_background_flush(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 	clear_bit(WB_writeback_running, &wb->state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 	return wrote;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067)  * Handle writeback of dirty data for the device backed by this bdi. Also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068)  * reschedules periodically and does kupdated style flushing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) void wb_workfn(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 	struct bdi_writeback *wb = container_of(to_delayed_work(work),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 						struct bdi_writeback, dwork);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	long pages_written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	set_worker_desc("flush-%s", bdi_dev_name(wb->bdi));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 	current->flags |= PF_SWAPWRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 	if (likely(!current_is_workqueue_rescuer() ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 		   !test_bit(WB_registered, &wb->state))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 		 * The normal path.  Keep writing back @wb until its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 		 * work_list is empty.  Note that this path is also taken
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 		 * if @wb is shutting down even when we're running off the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 		 * rescuer as work_list needs to be drained.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 			pages_written = wb_do_writeback(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 			trace_writeback_pages_written(pages_written);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 		} while (!list_empty(&wb->work_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 		 * bdi_wq can't get enough workers and we're running off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 		 * the emergency worker.  Don't hog it.  Hopefully, 1024 is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 		 * enough for efficient IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 		pages_written = writeback_inodes_wb(wb, 1024,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 						    WB_REASON_FORKER_THREAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 		trace_writeback_pages_written(pages_written);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 	if (!list_empty(&wb->work_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 		wb_wakeup(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 	else if (wb_has_dirty_io(wb) && dirty_writeback_interval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 		wb_wakeup_delayed(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 	current->flags &= ~PF_SWAPWRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111)  * Start writeback of `nr_pages' pages on this bdi. If `nr_pages' is zero,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112)  * write back the whole world.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) static void __wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 					 enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 	struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 	if (!bdi_has_dirty_io(bdi))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 	list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 		wb_start_writeback(wb, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 				enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 	__wakeup_flusher_threads_bdi(bdi, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135)  * Wakeup the flusher threads to start writeback of all currently dirty pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) void wakeup_flusher_threads(enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 	struct backing_dev_info *bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 	 * If we are expecting writeback progress we must submit plugged IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 	if (blk_needs_flush_plug(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 		blk_schedule_flush_plug(current);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 		__wakeup_flusher_threads_bdi(bdi, reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154)  * Wake up bdi's periodically to make sure dirtytime inodes gets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155)  * written back periodically.  We deliberately do *not* check the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156)  * b_dirtytime list in wb_has_dirty_io(), since this would cause the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157)  * kernel to be constantly waking up once there are any dirtytime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158)  * inodes on the system.  So instead we define a separate delayed work
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159)  * function which gets called much more rarely.  (By default, only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160)  * once every 12 hours.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162)  * If there is any other write activity going on in the file system,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163)  * this function won't be necessary.  But if the only thing that has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164)  * happened on the file system is a dirtytime inode caused by an atime
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165)  * update, we need this infrastructure below to make sure that inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166)  * eventually gets pushed out to disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) static void wakeup_dirtytime_writeback(struct work_struct *w);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) static DECLARE_DELAYED_WORK(dirtytime_work, wakeup_dirtytime_writeback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) static void wakeup_dirtytime_writeback(struct work_struct *w)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 	struct backing_dev_info *bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 		struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 		list_for_each_entry_rcu(wb, &bdi->wb_list, bdi_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 			if (!list_empty(&wb->b_dirty_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 				wb_wakeup(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) 	schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) static int __init start_dirtytime_writeback(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) 	schedule_delayed_work(&dirtytime_work, dirtytime_expire_interval * HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) __initcall(start_dirtytime_writeback);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) int dirtytime_interval_handler(struct ctl_table *table, int write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 			       void *buffer, size_t *lenp, loff_t *ppos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 	if (ret == 0 && write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 		mod_delayed_work(system_wq, &dirtytime_work, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206)  * __mark_inode_dirty -	internal function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208)  * @inode: inode to mark
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209)  * @flags: what kind of dirty (i.e. I_DIRTY_SYNC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211)  * Mark an inode as dirty. Callers should use mark_inode_dirty or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212)  * mark_inode_dirty_sync.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214)  * Put the inode on the super block's dirty list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216)  * CAREFUL! We mark it dirty unconditionally, but move it onto the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217)  * dirty list only if it is hashed or if it refers to a blockdev.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218)  * If it was not hashed, it will never be added to the dirty list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219)  * even if it is later hashed, as it will have been marked dirty already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221)  * In short, make sure you hash any inodes _before_ you start marking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222)  * them dirty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224)  * Note that for blockdevs, inode->dirtied_when represents the dirtying time of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225)  * the block-special inode (/dev/hda1) itself.  And the ->dirtied_when field of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226)  * the kernel-internal blockdev inode represents the dirtying time of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227)  * blockdev's pages.  This is why for I_DIRTY_PAGES we always use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228)  * page->mapping->host, so the page-dirtying time is recorded in the internal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)  * blockdev inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) void __mark_inode_dirty(struct inode *inode, int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 	struct super_block *sb = inode->i_sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 	int dirtytime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 	trace_writeback_mark_inode_dirty(inode, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 	 * Don't do this for I_DIRTY_PAGES - that doesn't actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 	 * dirty the inode itself
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 	if (flags & (I_DIRTY_INODE | I_DIRTY_TIME)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 		trace_writeback_dirty_inode_start(inode, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 		if (sb->s_op->dirty_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 			sb->s_op->dirty_inode(inode, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 		trace_writeback_dirty_inode(inode, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 	if (flags & I_DIRTY_INODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 		flags &= ~I_DIRTY_TIME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 	dirtytime = flags & I_DIRTY_TIME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 	 * Paired with smp_mb() in __writeback_single_inode() for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 	 * following lockless i_state test.  See there for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 	smp_mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 	if (((inode->i_state & flags) == flags) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 	    (dirtytime && (inode->i_state & I_DIRTY_INODE)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 	spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 	if (dirtytime && (inode->i_state & I_DIRTY_INODE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 		goto out_unlock_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 	if ((inode->i_state & flags) != flags) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) 		const int was_dirty = inode->i_state & I_DIRTY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 		inode_attach_wb(inode, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 		if (flags & I_DIRTY_INODE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 			inode->i_state &= ~I_DIRTY_TIME;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 		inode->i_state |= flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 		 * If the inode is queued for writeback by flush worker, just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 		 * update its dirty state. Once the flush worker is done with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 		 * the inode it will place it on the appropriate superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 		 * list, based upon its state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 		if (inode->i_state & I_SYNC_QUEUED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 			goto out_unlock_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 		 * Only add valid (hashed) inodes to the superblock's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 		 * dirty list.  Add blockdev inodes as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 		if (!S_ISBLK(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 			if (inode_unhashed(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 				goto out_unlock_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 		if (inode->i_state & I_FREEING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 			goto out_unlock_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) 		 * If the inode was already on b_dirty/b_io/b_more_io, don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) 		 * reposition it (that would break b_dirty time-ordering).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) 		if (!was_dirty) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 			struct bdi_writeback *wb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 			struct list_head *dirty_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 			bool wakeup_bdi = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 			wb = locked_inode_to_wb_and_lock_list(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 			WARN((wb->bdi->capabilities & BDI_CAP_WRITEBACK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 			     !test_bit(WB_registered, &wb->state),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 			     "bdi-%s not registered\n", bdi_dev_name(wb->bdi));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 			inode->dirtied_when = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 			if (dirtytime)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 				inode->dirtied_time_when = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 			if (inode->i_state & I_DIRTY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 				dirty_list = &wb->b_dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 				dirty_list = &wb->b_dirty_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 			wakeup_bdi = inode_io_list_move_locked(inode, wb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 							       dirty_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 			spin_unlock(&wb->list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 			trace_writeback_dirty_inode_enqueue(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 			 * If this is the first dirty inode for this bdi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 			 * we have to wake-up the corresponding bdi thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 			 * to make sure background write-back happens
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 			 * later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 			if (wakeup_bdi &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 			    (wb->bdi->capabilities & BDI_CAP_WRITEBACK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 				wb_wakeup_delayed(wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) out_unlock_inode:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) EXPORT_SYMBOL_NS(__mark_inode_dirty, ANDROID_GKI_VFS_EXPORT_ONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344)  * The @s_sync_lock is used to serialise concurrent sync operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345)  * to avoid lock contention problems with concurrent wait_sb_inodes() calls.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346)  * Concurrent callers will block on the s_sync_lock rather than doing contending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347)  * walks. The queueing maintains sync(2) required behaviour as all the IO that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348)  * has been issued up to the time this function is enter is guaranteed to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349)  * completed by the time we have gained the lock and waited for all IO that is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350)  * in progress regardless of the order callers are granted the lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) static void wait_sb_inodes(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) 	LIST_HEAD(sync_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) 	 * We need to be protected against the filesystem going from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 	 * r/o to r/w or vice versa.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) 	mutex_lock(&sb->s_sync_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 	 * Splice the writeback list onto a temporary list to avoid waiting on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 	 * inodes that have started writeback after this point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 	 * Use rcu_read_lock() to keep the inodes around until we have a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 	 * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 	 * the local list because inodes can be dropped from either by writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 	 * completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 	spin_lock_irq(&sb->s_inode_wblist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 	list_splice_init(&sb->s_inodes_wb, &sync_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 	 * Data integrity sync. Must wait for all pages under writeback, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 	 * there may have been pages dirtied before our sync call, but which had
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 	 * writeout started before we write it out.  In which case, the inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 	 * may not be on the dirty list, but we still have to wait for that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 	 * writeout.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 	while (!list_empty(&sync_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 		struct inode *inode = list_first_entry(&sync_list, struct inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 						       i_wb_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 		struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 		 * Move each inode back to the wb list before we drop the lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 		 * to preserve consistency between i_wb_list and the mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) 		 * writeback tag. Writeback completion is responsible to remove
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 		 * the inode from either list once the writeback tag is cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 		list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 		 * The mapping can appear untagged while still on-list since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 		 * do not have the mapping lock. Skip it here, wb completion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 		 * will remove it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 		if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 		spin_unlock_irq(&sb->s_inode_wblist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 		spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 		if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 			spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 			spin_lock_irq(&sb->s_inode_wblist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) 		__iget(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 		spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 		 * We keep the error status of individual mapping so that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 		 * applications can catch the writeback error using fsync(2).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 		 * See filemap_fdatawait_keep_errors() for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 		filemap_fdatawait_keep_errors(mapping);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 		iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 		spin_lock_irq(&sb->s_inode_wblist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 	spin_unlock_irq(&sb->s_inode_wblist_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 	mutex_unlock(&sb->s_sync_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) static void __writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 				     enum wb_reason reason, bool skip_if_busy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 	struct backing_dev_info *bdi = sb->s_bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 	DEFINE_WB_COMPLETION(done, bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 	struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 		.sb			= sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 		.sync_mode		= WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 		.tagged_writepages	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) 		.done			= &done,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 		.nr_pages		= nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 		.reason			= reason,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 	if (!bdi_has_dirty_io(bdi) || bdi == &noop_backing_dev_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 	bdi_split_work_to_wbs(sb->s_bdi, &work, skip_if_busy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 	wb_wait_for_completion(&done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460)  * writeback_inodes_sb_nr -	writeback dirty inodes from given super_block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461)  * @sb: the superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462)  * @nr: the number of pages to write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463)  * @reason: reason why some writeback work initiated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465)  * Start writeback on some inodes on this super_block. No guarantees are made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466)  * on how many (if any) will be written, and this function does not wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467)  * for IO completion of submitted IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) void writeback_inodes_sb_nr(struct super_block *sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) 			    unsigned long nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) 			    enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) 	__writeback_inodes_sb_nr(sb, nr, reason, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) EXPORT_SYMBOL(writeback_inodes_sb_nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478)  * writeback_inodes_sb	-	writeback dirty inodes from given super_block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479)  * @sb: the superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480)  * @reason: reason why some writeback work was initiated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482)  * Start writeback on some inodes on this super_block. No guarantees are made
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483)  * on how many (if any) will be written, and this function does not wait
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484)  * for IO completion of submitted IO.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) 	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) EXPORT_SYMBOL(writeback_inodes_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493)  * try_to_writeback_inodes_sb - try to start writeback if none underway
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494)  * @sb: the superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495)  * @reason: reason why some writeback work was initiated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497)  * Invoke __writeback_inodes_sb_nr if no writeback is currently underway.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) 	if (!down_read_trylock(&sb->s_umount))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) 	__writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 	up_read(&sb->s_umount);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) EXPORT_SYMBOL_NS(try_to_writeback_inodes_sb, ANDROID_GKI_VFS_EXPORT_ONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510)  * sync_inodes_sb	-	sync sb inode pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511)  * @sb: the superblock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513)  * This function writes and waits on any dirty inode belonging to this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514)  * super_block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) void sync_inodes_sb(struct super_block *sb)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 	struct backing_dev_info *bdi = sb->s_bdi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 	DEFINE_WB_COMPLETION(done, bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 	struct wb_writeback_work work = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 		.sb		= sb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 		.sync_mode	= WB_SYNC_ALL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 		.nr_pages	= LONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 		.range_cyclic	= 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) 		.done		= &done,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 		.reason		= WB_REASON_SYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 		.for_sync	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) 	 * Can't skip on !bdi_has_dirty() because we should wait for !dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 	 * inodes under writeback and I_DIRTY_TIME inodes ignored by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 	 * bdi_has_dirty() need to be written out too.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 	if (bdi == &noop_backing_dev_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 	/* protect against inode wb switch, see inode_switch_wbs_work_fn() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) 	bdi_down_write_wb_switch_rwsem(bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 	bdi_split_work_to_wbs(bdi, &work, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) 	wb_wait_for_completion(&done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 	bdi_up_write_wb_switch_rwsem(bdi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) 	wait_sb_inodes(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) EXPORT_SYMBOL(sync_inodes_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550)  * write_inode_now	-	write an inode to disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551)  * @inode: inode to write to disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552)  * @sync: whether the write should be synchronous or not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554)  * This function commits an inode to disk immediately if it is dirty. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555)  * primarily needed by knfsd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557)  * The caller must either have a ref on the inode or must have set I_WILL_FREE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) int write_inode_now(struct inode *inode, int sync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) 	struct writeback_control wbc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) 		.nr_to_write = LONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) 		.sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) 		.range_start = 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) 		.range_end = LLONG_MAX,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) 	if (!mapping_can_writeback(inode->i_mapping))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 		wbc.nr_to_write = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) 	might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 	return writeback_single_inode(inode, &wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) EXPORT_SYMBOL_NS(write_inode_now, ANDROID_GKI_VFS_EXPORT_ONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577)  * sync_inode - write an inode and its pages to disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578)  * @inode: the inode to sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579)  * @wbc: controls the writeback mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581)  * sync_inode() will write an inode and its pages to disk.  It will also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582)  * correctly update the inode on its superblock's dirty inode lists and will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583)  * update inode->i_state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585)  * The caller must have a ref on the inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) int sync_inode(struct inode *inode, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) 	return writeback_single_inode(inode, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) EXPORT_SYMBOL(sync_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594)  * sync_inode_metadata - write an inode to disk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595)  * @inode: the inode to sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596)  * @wait: wait for I/O to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598)  * Write an inode to disk and adjust its dirty state after completion.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600)  * Note: only writes the actual inode, no associated data or other metadata.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) int sync_inode_metadata(struct inode *inode, int wait)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 	struct writeback_control wbc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 		.sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) 		.nr_to_write = 0, /* metadata-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) 	return sync_inode(inode, &wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) EXPORT_SYMBOL_NS(sync_inode_metadata, ANDROID_GKI_VFS_EXPORT_ONLY);