Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) #include <linux/ceph/ceph_debug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) #include <linux/writeback.h>	/* generic_writepages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/pagevec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/task_io_accounting_ops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/iversion.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/ktime.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include "super.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include "mds_client.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include "cache.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include "metric.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/ceph/osd_client.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/ceph/striper.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24)  * Ceph address space ops.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26)  * There are a few funny things going on here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28)  * The page->private field is used to reference a struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29)  * ceph_snap_context for _every_ dirty page.  This indicates which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30)  * snapshot the page was logically dirtied in, and thus which snap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31)  * context needs to be associated with the osd write during writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33)  * Similarly, struct ceph_inode_info maintains a set of counters to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34)  * count dirty pages on the inode.  In the absence of snapshots,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37)  * When a snapshot is taken (that is, when the client receives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38)  * notification that a snapshot was taken), each inode with caps and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  * with dirty pages (dirty pages implies there is a cap) gets a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40)  * ceph_cap_snap in the i_cap_snaps list (which is sorted in ascending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41)  * order, new snaps go to the tail).  The i_wrbuffer_ref_head count is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42)  * moved to capsnap->dirty. (Unless a sync write is currently in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43)  * progress.  In that case, the capsnap is said to be "pending", new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44)  * writes cannot start, and the capsnap isn't "finalized" until the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45)  * write completes (or fails) and a final size/mtime for the inode for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46)  * that snap can be settled upon.)  i_wrbuffer_ref_head is reset to 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48)  * On writeback, we must submit writes to the osd IN SNAP ORDER.  So,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49)  * we look for the first capsnap in i_cap_snaps and write out pages in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50)  * that snap context _only_.  Then we move on to the next capsnap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51)  * eventually reaching the "live" or "head" context (i.e., pages that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52)  * are not yet snapped) and are writing the most recently dirtied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53)  * pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55)  * Invalidate and so forth must take care to ensure the dirty page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56)  * accounting is preserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) #define CONGESTION_ON_THRESH(congestion_kb) (congestion_kb >> (PAGE_SHIFT-10))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) #define CONGESTION_OFF_THRESH(congestion_kb)				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 	(CONGESTION_ON_THRESH(congestion_kb) -				\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 	 (CONGESTION_ON_THRESH(congestion_kb) >> 2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) static inline struct ceph_snap_context *page_snap_context(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 	if (PagePrivate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 		return (void *)page->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72)  * Dirty a page.  Optimistically adjust accounting, on the assumption
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73)  * that we won't race with invalidate.  If we do, readjust.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) static int ceph_set_page_dirty(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 	struct address_space *mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 	struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	struct ceph_inode_info *ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	struct ceph_snap_context *snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	if (PageDirty(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 		dout("%p set_page_dirty %p idx %lu -- already dirty\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 		     mapping->host, page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 		BUG_ON(!PagePrivate(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	/* dirty the head */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 	BUG_ON(ci->i_wr_ref == 0); // caller should hold Fw reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 	if (__ceph_have_pending_cap_snap(ci)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 		struct ceph_cap_snap *capsnap =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 				list_last_entry(&ci->i_cap_snaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 						struct ceph_cap_snap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 						ci_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 		snapc = ceph_get_snap_context(capsnap->context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 		capsnap->dirty_pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 		BUG_ON(!ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 		snapc = ceph_get_snap_context(ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 		++ci->i_wrbuffer_ref_head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 	if (ci->i_wrbuffer_ref == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 		ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	++ci->i_wrbuffer_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	dout("%p set_page_dirty %p idx %lu head %d/%d -> %d/%d "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 	     "snapc %p seq %lld (%d snaps)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	     mapping->host, page, page->index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	     ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 	     ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	     snapc, snapc->seq, snapc->num_snaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	 * Reference snap context in page->private.  Also set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	 * PagePrivate so that we get invalidatepage callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	BUG_ON(PagePrivate(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	page->private = (unsigned long)snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	SetPagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	return __set_page_dirty_nobuffers(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130)  * If we are truncating the full page (i.e. offset == 0), adjust the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131)  * dirty page counters appropriately.  Only called if there is private
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132)  * data on the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) static void ceph_invalidatepage(struct page *page, unsigned int offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 				unsigned int length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 	struct ceph_inode_info *ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 	struct ceph_snap_context *snapc = page_snap_context(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 	inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	if (offset != 0 || length != PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 		dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 		     inode, page, page->index, offset, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	ceph_invalidate_fscache_page(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	WARN_ON(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	if (!PagePrivate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 	dout("%p invalidatepage %p idx %lu full dirty page\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 	     inode, page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	ceph_put_snap_context(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	page->private = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	ClearPagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) static int ceph_releasepage(struct page *page, gfp_t g)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 	dout("%p releasepage %p idx %lu (%sdirty)\n", page->mapping->host,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	     page, page->index, PageDirty(page) ? "" : "not ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	/* Can we release the page from the cache? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 	if (!ceph_release_fscache_page(page, g))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	return !PagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) /* read a single page, without unlocking it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) static int ceph_do_readpage(struct file *filp, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	struct inode *inode = file_inode(filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 	struct ceph_osd_client *osdc = &fsc->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 	struct ceph_vino vino = ceph_vino(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	u64 off = page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 	u64 len = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	if (off >= i_size_read(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 		zero_user_segment(page, 0, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 		SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 	if (ci->i_inline_version != CEPH_INLINE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 		 * Uptodate inline data should have been added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 		 * into page cache while getting Fcr caps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 		if (off == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 		zero_user_segment(page, 0, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 		SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	err = ceph_readpage_from_fscache(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	if (err == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 		return -EINPROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 	dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	     vino.ino, vino.snap, filp, off, len, page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, 0, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 				    ci->i_truncate_seq, ci->i_truncate_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 				    false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	if (IS_ERR(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 		return PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	err = ceph_osdc_start_request(osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 	if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 		err = ceph_osdc_wait_request(osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 	ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 				 req->r_end_latency, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 	dout("readpage result %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	if (err == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 		err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 		ceph_fscache_readpage_cancel(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 		if (err == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 			fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	if (err < PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 		/* zero fill remainder of page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 		zero_user_segment(page, err, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 		flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	ceph_readpage_to_fscache(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	return err < 0 ? err : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) static int ceph_readpage(struct file *filp, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 	int r = ceph_do_readpage(filp, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 	if (r != -EINPROGRESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 		unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 		r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 	return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265)  * Finish an async read(ahead) op.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) static void finish_read(struct ceph_osd_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 	struct inode *inode = req->r_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 	struct ceph_osd_data *osd_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 	int rc = req->r_result <= 0 ? req->r_result : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 	int bytes = req->r_result >= 0 ? req->r_result : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 	int num_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 	if (rc == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 		ceph_inode_to_client(inode)->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	/* unlock all pages, zeroing any data we didn't read */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 	osd_data = osd_req_op_extent_osd_data(req, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 	BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 	num_pages = calc_pages_for((u64)osd_data->alignment,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 					(u64)osd_data->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	for (i = 0; i < num_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 		struct page *page = osd_data->pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 		if (rc < 0 && rc != -ENOENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 			ceph_fscache_readpage_cancel(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 			goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 		if (bytes < (int)PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 			/* zero (remainder of) page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 			int s = bytes < 0 ? 0 : bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 			zero_user_segment(page, s, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298)  		dout("finish_read %p uptodate %p idx %lu\n", inode, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 		     page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 		flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 		SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 		ceph_readpage_to_fscache(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 		unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 		bytes -= PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 				 req->r_end_latency, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	kfree(osd_data->pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316)  * start an async read(ahead) operation.  return nr_pages we submitted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317)  * a read for on success, or negative error code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 		      struct list_head *page_list, int max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	struct ceph_osd_client *osdc =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 		&ceph_inode_to_client(inode)->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	struct page *page = lru_to_page(page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	struct ceph_vino vino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 	struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 	u64 off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 	u64 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	struct page **pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 	pgoff_t next_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	int nr_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 	int got = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	if (!rw_ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 		/* caller of readpages does not hold buffer and read caps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 		 * (fadvise, madvise and readahead cases) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 		int want = CEPH_CAP_FILE_CACHE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 		ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 					true, &got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 		if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 			dout("start_read %p, error getting cap\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 		} else if (!(got & want)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 			dout("start_read %p, no cache cap\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 			ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 		if (ret <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 			if (got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 				ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 			while (!list_empty(page_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 				page = lru_to_page(page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 				list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 				put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 	off = (u64) page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	/* count pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	next_index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 	list_for_each_entry_reverse(page, page_list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 		if (page->index != next_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 		nr_pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 		next_index++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 		if (max && nr_pages == max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 	len = nr_pages << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 	dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	     off, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	vino = ceph_vino(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 	req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 				    0, 1, CEPH_OSD_OP_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 				    CEPH_OSD_FLAG_READ, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 				    ci->i_truncate_seq, ci->i_truncate_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 				    false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 	if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 		ret = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	/* build page vector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	nr_pages = calc_pages_for(0, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 	pages = kmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	if (!pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 		ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 		goto out_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	for (i = 0; i < nr_pages; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 		page = list_entry(page_list->prev, struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 		BUG_ON(PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 		list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399)  		dout("start_read %p adding %p idx %lu\n", inode, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 		     page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 		if (add_to_page_cache_lru(page, &inode->i_data, page->index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 					  GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 			ceph_fscache_uncache_page(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 			dout("start_read %p add_to_page_cache failed %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 			     inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 			nr_pages = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 			if (nr_pages > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 				len = nr_pages << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 				osd_req_op_extent_update(req, 0, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 			goto out_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		pages[i] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	req->r_callback = finish_read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 	req->r_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	ret = ceph_osdc_start_request(osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 		goto out_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	/* After adding locked pages to page cache, the inode holds cache cap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 	 * So we can drop our cap refs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	if (got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 		ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 	return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) out_pages:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	for (i = 0; i < nr_pages; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 		ceph_fscache_readpage_cancel(inode, pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 		unlock_page(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 	ceph_put_page_vector(pages, nr_pages, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) out_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	if (got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 		ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450)  * Read multiple pages.  Leave pages we don't read + unlock in page_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451)  * the caller (VM) cleans them up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) static int ceph_readpages(struct file *file, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 			  struct list_head *page_list, unsigned nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	struct ceph_file_info *fi = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 	struct ceph_rw_context *rw_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	int rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 	int max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 	rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 					 &nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 	if (rc == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	rw_ctx = ceph_find_rw_context(fi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 	max = fsc->mount_options->rsize >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 	     inode, file, rw_ctx, nr_pages, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 	while (!list_empty(page_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 		rc = start_read(inode, rw_ctx, page_list, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 		if (rc < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	ceph_fscache_readpages_cancel(inode, page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	dout("readpages %p file %p ret %d\n", inode, file, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 	return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) struct ceph_writeback_ctl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 	loff_t i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	u64 truncate_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	u32 truncate_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	bool size_stable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	bool head_snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498)  * Get ref for the oldest snapc for an inode with dirty data... that is, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499)  * only snap context we are allowed to write back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) static struct ceph_snap_context *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 		   struct ceph_snap_context *page_snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 	struct ceph_snap_context *snapc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	struct ceph_cap_snap *capsnap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 	spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 		dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		     capsnap->context, capsnap->dirty_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 		if (!capsnap->dirty_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 		/* get i_size, truncate_{seq,size} for page_snapc? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 		if (snapc && capsnap->context != page_snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 		if (ctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 			if (capsnap->writing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 				ctl->i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 				ctl->size_stable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 				ctl->i_size = capsnap->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 				ctl->size_stable = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 			ctl->truncate_size = capsnap->truncate_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 			ctl->truncate_seq = capsnap->truncate_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 			ctl->head_snapc = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 		if (snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 		snapc = ceph_get_snap_context(capsnap->context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 		if (!page_snapc ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 		    page_snapc == snapc ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 		    page_snapc->seq > snapc->seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	if (!snapc && ci->i_wrbuffer_ref_head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 		snapc = ceph_get_snap_context(ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 		dout(" head snapc %p has %d dirty pages\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 		     snapc, ci->i_wrbuffer_ref_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 		if (ctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 			ctl->i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 			ctl->truncate_size = ci->i_truncate_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 			ctl->truncate_seq = ci->i_truncate_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 			ctl->size_stable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 			ctl->head_snapc = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 	return snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) static u64 get_writepages_data_length(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 				      struct page *page, u64 start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 	struct ceph_snap_context *snapc = page_snap_context(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 	struct ceph_cap_snap *capsnap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 	u64 end = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 	if (snapc != ci->i_head_snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 		bool found = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 		spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 		list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 			if (capsnap->context == snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 				if (!capsnap->writing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 					end = capsnap->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 				found = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 		spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 		WARN_ON(!found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	if (end > page_offset(page) + PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 		end = page_offset(page) + PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	return end > start ? end - start : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586)  * Write a single page, but leave the page locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588)  * If we get a write error, mark the mapping for error, but still adjust the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589)  * dirty page accounting (i.e., page is no longer dirty).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	struct inode *inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	struct ceph_snap_context *snapc, *oldest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 	loff_t page_off = page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	loff_t len = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 	struct ceph_writeback_ctl ceph_wbc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 	struct ceph_osd_client *osdc = &fsc->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 	struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	dout("writepage %p idx %lu\n", page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	/* verify this is a writeable snap context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	snapc = page_snap_context(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	if (!snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 		dout("writepage %p page %p not dirty?\n", inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	oldest = get_oldest_context(inode, &ceph_wbc, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	if (snapc->seq > oldest->seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 		dout("writepage %p page %p snapc %p not writeable - noop\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 		     inode, page, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 		/* we should only noop if called by kswapd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 		WARN_ON(!(current->flags & PF_MEMALLOC));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 		ceph_put_snap_context(oldest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 		redirty_page_for_writepage(wbc, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	ceph_put_snap_context(oldest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	/* is this a partial page at end of file? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	if (page_off >= ceph_wbc.i_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 		dout("%p page eof %llu\n", page, ceph_wbc.i_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 		page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	if (ceph_wbc.i_size < page_off + len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 		len = ceph_wbc.i_size - page_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 	     inode, page, page->index, page_off, len, snapc, snapc->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	if (atomic_long_inc_return(&fsc->writeback_count) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	    CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 		set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	set_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 				    CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 				    ceph_wbc.truncate_seq, ceph_wbc.truncate_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 				    true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 		redirty_page_for_writepage(wbc, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 		end_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 		return PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	/* it may be a short write due to an object boundary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	WARN_ON_ONCE(len > PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 	dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 	req->r_mtime = inode->i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 	err = ceph_osdc_start_request(osdc, req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 		err = ceph_osdc_wait_request(osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 	ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 				  req->r_end_latency, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	if (err == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 		err = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 		struct writeback_control tmp_wbc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 		if (!wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 			wbc = &tmp_wbc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 		if (err == -ERESTARTSYS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 			/* killed by SIGKILL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 			dout("writepage interrupted page %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 			redirty_page_for_writepage(wbc, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 			end_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 			return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 		if (err == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 			fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 		dout("writepage setting page/mapping error %d %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 		     err, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 		mapping_set_error(&inode->i_data, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 		wbc->pages_skipped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 		dout("writepage cleaned page %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 		err = 0;  /* vfs expects us to return 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 	page->private = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	ClearPagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 	end_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 	ceph_put_snap_context(snapc);  /* page's reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	if (atomic_long_dec_return(&fsc->writeback_count) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 	    CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 		clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) static int ceph_writepage(struct page *page, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 	struct inode *inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 	BUG_ON(!inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	err = writepage_nounlock(page, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 	if (err == -ERESTARTSYS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 		/* direct memory reclaimer was killed by SIGKILL. return 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 		 * to prevent caller from setting mapping/page error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 		err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721)  * async writeback completion handler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723)  * If we get an error, set the mapping error bit, but not the individual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724)  * page error bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) static void writepages_finish(struct ceph_osd_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 	struct inode *inode = req->r_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	struct ceph_osd_data *osd_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 	int num_pages, total_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	int rc = req->r_result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	struct ceph_snap_context *snapc = req->r_snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	bool remove_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 	dout("writepages_finish %p rc %d\n", inode, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	if (rc < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 		mapping_set_error(mapping, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 		ceph_set_error_write(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 		if (rc == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 			fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 		ceph_clear_error_write(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 				  req->r_end_latency, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	 * We lost the cache cap, need to truncate the page before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	 * it is unlocked, otherwise we'd truncate it later in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	 * page truncation thread, possibly losing some data that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	 * raced its way in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	remove_page = !(ceph_caps_issued(ci) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 			(CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	/* clean all pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	for (i = 0; i < req->r_num_ops; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 		if (req->r_ops[i].op != CEPH_OSD_OP_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 		osd_data = osd_req_op_extent_osd_data(req, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 		BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 		num_pages = calc_pages_for((u64)osd_data->alignment,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 					   (u64)osd_data->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 		total_pages += num_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 		for (j = 0; j < num_pages; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 			page = osd_data->pages[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 			BUG_ON(!page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 			WARN_ON(!PageUptodate(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 			if (atomic_long_dec_return(&fsc->writeback_count) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 			     CONGESTION_OFF_THRESH(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 					fsc->mount_options->congestion_kb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 				clear_bdi_congested(inode_to_bdi(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 						    BLK_RW_ASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 			ceph_put_snap_context(page_snap_context(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 			page->private = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 			ClearPagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 			dout("unlocking %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 			end_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 			if (remove_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 				generic_error_remove_page(inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 							  page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 		dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 		     inode, osd_data->length, rc >= 0 ? num_pages : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 		release_pages(osd_data->pages, num_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	osd_data = osd_req_op_extent_osd_data(req, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 	if (osd_data->pages_from_pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 		mempool_free(osd_data->pages, ceph_wb_pagevec_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		kfree(osd_data->pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 	ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812)  * initiate async writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) static int ceph_writepages_start(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 				 struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 	struct ceph_vino vino = ceph_vino(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	pgoff_t index, start_index, end = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 	struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	struct pagevec pvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	int rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	unsigned int wsize = i_blocksize(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	struct ceph_osd_request *req = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	struct ceph_writeback_ctl ceph_wbc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	bool should_loop, range_whole = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	bool done = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	dout("writepages_start %p (mode=%s)\n", inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 	if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 		if (ci->i_wrbuffer_ref > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 			pr_warn_ratelimited(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 				"writepage_start %p %lld forced umount\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 				inode, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 		mapping_set_error(mapping, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 		return -EIO; /* we're in a forced umount, don't write! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 	if (fsc->mount_options->wsize < wsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 		wsize = fsc->mount_options->wsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 	pagevec_init(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 	index = start_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	/* find oldest snap context with dirty data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	snapc = get_oldest_context(inode, &ceph_wbc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	if (!snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 		/* hmm, why does writepages get called when there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 		   is no dirty data? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 		dout(" no snap context with dirty data?\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	dout(" oldest snapc is %p seq %lld (%d snaps)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	     snapc, snapc->seq, snapc->num_snaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	should_loop = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	if (ceph_wbc.head_snapc && snapc != last_snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 		/* where to start/end? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 		if (wbc->range_cyclic) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 			index = start_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 			end = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 			if (index > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 				should_loop = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 			dout(" cyclic, start at %lu\n", index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 			index = wbc->range_start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 			end = wbc->range_end >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 			if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 				range_whole = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 			dout(" not cyclic, %lu to %lu\n", index, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	} else if (!ceph_wbc.head_snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 		/* Do not respect wbc->range_{start,end}. Dirty pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 		 * in that range can be associated with newer snapc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 		 * They are not writeable until we write all dirty pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 		 * associated with 'snapc' get written */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		if (index > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 			should_loop = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 		dout(" non-head snapc, range whole\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	ceph_put_snap_context(last_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	last_snapc = snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	while (!done && index <= end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 		int num_ops = 0, op_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 		unsigned i, pvec_pages, max_pages, locked_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 		struct page **pages = NULL, **data_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 		pgoff_t strip_unit_end = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 		u64 offset = 0, len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 		bool from_pool = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 		max_pages = wsize >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) get_more_pages:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 		pvec_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 						end, PAGECACHE_TAG_DIRTY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 		dout("pagevec_lookup_range_tag got %d\n", pvec_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 		if (!pvec_pages && !locked_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 		for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 			page = pvec.pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 			dout("? %p idx %lu\n", page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 			if (locked_pages == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 				lock_page(page);  /* first page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 			else if (!trylock_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 			/* only dirty pages, or our accounting breaks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 			if (unlikely(!PageDirty(page)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 			    unlikely(page->mapping != mapping)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 				dout("!dirty or !mapping %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 				unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 			/* only if matching snap context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 			pgsnapc = page_snap_context(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 			if (pgsnapc != snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 				dout("page snapc %p %lld != oldest %p %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 				     pgsnapc, pgsnapc->seq, snapc, snapc->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 				if (!should_loop &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 				    !ceph_wbc.head_snapc &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 				    wbc->sync_mode != WB_SYNC_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 					should_loop = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 				unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 			if (page_offset(page) >= ceph_wbc.i_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 				dout("%p page eof %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 				     page, ceph_wbc.i_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 				if ((ceph_wbc.size_stable ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 				    page_offset(page) >= i_size_read(inode)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 				    clear_page_dirty_for_io(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 					mapping->a_ops->invalidatepage(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 								0, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 				unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 			if (strip_unit_end && (page->index > strip_unit_end)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 				dout("end of strip unit %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 				unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 			if (PageWriteback(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 				if (wbc->sync_mode == WB_SYNC_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 					dout("%p under writeback\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 					unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 				dout("waiting on writeback %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 				wait_on_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 			if (!clear_page_dirty_for_io(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 				dout("%p !clear_page_dirty_for_io\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 				unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 			 * We have something to write.  If this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 			 * the first locked page this time through,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 			 * calculate max possinle write size and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 			 * allocate a page array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 			if (locked_pages == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 				u64 objnum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 				u64 objoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 				u32 xlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 				/* prepare async write request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 				offset = (u64)page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 				ceph_calc_file_object_mapping(&ci->i_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 							      offset, wsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 							      &objnum, &objoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 							      &xlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 				len = xlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 				num_ops = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 				strip_unit_end = page->index +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 					((len - 1) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 				BUG_ON(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 				max_pages = calc_pages_for(0, (u64)len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 				pages = kmalloc_array(max_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 						      sizeof(*pages),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 						      GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 				if (!pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 					from_pool = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 					pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 					BUG_ON(!pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 				len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 			} else if (page->index !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 				   (offset + len) >> PAGE_SHIFT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 				if (num_ops >= (from_pool ?  CEPH_OSD_SLAB_OPS :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 							     CEPH_OSD_MAX_OPS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 					redirty_page_for_writepage(wbc, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 					unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 				num_ops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 				offset = (u64)page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 				len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 			/* note position of first page in pvec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 			dout("%p will write page %p idx %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 			     inode, page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 			if (atomic_long_inc_return(&fsc->writeback_count) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 			    CONGESTION_ON_THRESH(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 				    fsc->mount_options->congestion_kb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 				set_bdi_congested(inode_to_bdi(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 						  BLK_RW_ASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 			pages[locked_pages++] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 			pvec.pages[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 			len += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 		/* did we get anything? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 		if (!locked_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 			goto release_pvec_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 		if (i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 			unsigned j, n = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 			/* shift unused page to beginning of pvec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 			for (j = 0; j < pvec_pages; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 				if (!pvec.pages[j])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 				if (n < j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 					pvec.pages[n] = pvec.pages[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 				n++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 			pvec.nr = n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 			if (pvec_pages && i == pvec_pages &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 			    locked_pages < max_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 				dout("reached end pvec, trying for more\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 				pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 				goto get_more_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) new_request:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 		offset = page_offset(pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 		len = wsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 		req = ceph_osdc_new_request(&fsc->client->osdc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 					&ci->i_layout, vino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 					offset, &len, 0, num_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 					CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 					snapc, ceph_wbc.truncate_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 					ceph_wbc.truncate_size, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 		if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 			req = ceph_osdc_new_request(&fsc->client->osdc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 						&ci->i_layout, vino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 						offset, &len, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 						min(num_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 						    CEPH_OSD_SLAB_OPS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 						CEPH_OSD_OP_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 						CEPH_OSD_FLAG_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 						snapc, ceph_wbc.truncate_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 						ceph_wbc.truncate_size, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 			BUG_ON(IS_ERR(req));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 		BUG_ON(len < page_offset(pages[locked_pages - 1]) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 			     PAGE_SIZE - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 		req->r_callback = writepages_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 		req->r_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 		/* Format the osd request message and submit the write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 		len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 		data_pages = pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 		op_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 		for (i = 0; i < locked_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 			u64 cur_offset = page_offset(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 			if (offset + len != cur_offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 				if (op_idx + 1 == req->r_num_ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 				osd_req_op_extent_dup_last(req, op_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 							   cur_offset - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 				dout("writepages got pages at %llu~%llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 				     offset, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 				osd_req_op_extent_osd_data_pages(req, op_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 							data_pages, len, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 							from_pool, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 				osd_req_op_extent_update(req, op_idx, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 				len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 				offset = cur_offset; 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 				data_pages = pages + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 				op_idx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 			set_page_writeback(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 			len += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 		if (ceph_wbc.size_stable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 			len = min(len, ceph_wbc.i_size - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 		} else if (i == locked_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 			/* writepages_finish() clears writeback pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 			 * according to the data length, so make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 			 * data length covers all locked pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 			u64 min_len = len + 1 - PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 			len = get_writepages_data_length(inode, pages[i - 1],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 							 offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 			len = max(len, min_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 		dout("writepages got pages at %llu~%llu\n", offset, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 		osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 						 0, from_pool, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 		osd_req_op_extent_update(req, op_idx, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 		BUG_ON(op_idx + 1 != req->r_num_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 		from_pool = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 		if (i < locked_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 			BUG_ON(num_ops <= req->r_num_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 			num_ops -= req->r_num_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 			locked_pages -= i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 			/* allocate new pages array for next request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 			data_pages = pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 			pages = kmalloc_array(locked_pages, sizeof(*pages),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 					      GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 			if (!pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 				from_pool = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 				pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 				BUG_ON(!pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 			memcpy(pages, data_pages + i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 			       locked_pages * sizeof(*pages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 			memset(data_pages + i, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 			       locked_pages * sizeof(*pages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 			BUG_ON(num_ops != req->r_num_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 			index = pages[i - 1]->index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 			/* request message now owns the pages array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 			pages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 		req->r_mtime = inode->i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 		rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 		BUG_ON(rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 		req = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 		wbc->nr_to_write -= i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 		if (pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 			goto new_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 		 * We stop writing back only if we are not doing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 		 * integrity sync. In case of integrity sync we have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 		 * keep going until we have written all the pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 		 * we tagged for writeback prior to entering this loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 		if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 			done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) release_pvec_pages:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 		dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 		     pvec.nr ? pvec.pages[0] : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 		pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	if (should_loop && !done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 		/* more to do; loop back to beginning of file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 		dout("writepages looping back to beginning of file\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 		end = start_index - 1; /* OK even when start_index == 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 		/* to write dirty pages associated with next snapc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 		 * we need to wait until current writes complete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		if (wbc->sync_mode != WB_SYNC_NONE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 		    start_index == 0 && /* all dirty pages were checked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 		    !ceph_wbc.head_snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 			struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 			unsigned i, nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 			index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 			while ((index <= end) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 			       (nr = pagevec_lookup_tag(&pvec, mapping, &index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 						PAGECACHE_TAG_WRITEBACK))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 				for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 					page = pvec.pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 					if (page_snap_context(page) != snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 						continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 					wait_on_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 				pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 				cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 		start_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 		index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 		mapping->writeback_index = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 	ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 	ceph_put_snap_context(last_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	dout("writepages dend - startone, rc = %d\n", rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 	return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)  * See if a given @snapc is either writeable, or already written.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) static int context_is_writeable_or_written(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 					   struct ceph_snap_context *snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	struct ceph_snap_context *oldest = get_oldest_context(inode, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	int ret = !oldest || snapc->seq <= oldest->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	ceph_put_snap_context(oldest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242)  * ceph_find_incompatible - find an incompatible context and return it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243)  * @page: page being dirtied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245)  * We are only allowed to write into/dirty a page if the page is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246)  * clean, or already dirty within the same snap context. Returns a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247)  * conflicting context if there is one, NULL if there isn't, or a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)  * negative error code on other errors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250)  * Must be called with page lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) static struct ceph_snap_context *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) ceph_find_incompatible(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 	struct inode *inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 	if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 		dout(" page %p forced umount\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 		return ERR_PTR(-EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 		struct ceph_snap_context *snapc, *oldest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 		wait_on_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 		snapc = page_snap_context(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 		if (!snapc || snapc == ci->i_head_snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 		 * this page is already dirty in another (older) snap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 		 * context!  is it writeable now?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 		oldest = get_oldest_context(inode, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 		if (snapc->seq > oldest->seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 			/* not writeable -- return it for the caller to deal with */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 			ceph_put_snap_context(oldest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 			dout(" page %p snapc %p not current or oldest\n", page, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 			return ceph_get_snap_context(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 		ceph_put_snap_context(oldest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 		/* yay, writeable, do it now (without dropping page lock) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 		dout(" page %p snapc %p not current, but oldest\n", page, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 		if (clear_page_dirty_for_io(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 			int r = writepage_nounlock(page, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 			if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 				return ERR_PTR(r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298)  * prep_noread_page - prep a page for writing without reading first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299)  * @page: page being prepared
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300)  * @pos: starting position for the write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301)  * @len: length of write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303)  * In some cases, write_begin doesn't need to read at all:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)  * - full page write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305)  * - file is currently zero-length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306)  * - write that lies in a page that is completely beyond EOF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307)  * - write that covers the the page from start to EOF or beyond it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309)  * If any of these criteria are met, then zero out the unwritten parts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310)  * of the page and return true. Otherwise, return false.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) static bool skip_page_read(struct page *page, loff_t pos, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 	struct inode *inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	loff_t i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 	size_t offset = offset_in_page(pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 	/* Full page write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 	if (offset == 0 && len >= PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	/* pos beyond last page in the file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 	if (pos - offset >= i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 		goto zero_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	/* write that covers the whole page from start to EOF or beyond it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 	if (offset == 0 && (pos + len) >= i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 		goto zero_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) zero_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337)  * We are only allowed to write into/dirty the page if the page is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338)  * clean, or already dirty within the same snap context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) static int ceph_write_begin(struct file *file, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 			    loff_t pos, unsigned len, unsigned flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 			    struct page **pagep, void **fsdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	struct ceph_snap_context *snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	pgoff_t index = pos >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 	int r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 	for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 		page = grab_cache_page_write_begin(mapping, index, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 		if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 			r = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 		snapc = ceph_find_incompatible(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 		if (snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 			if (IS_ERR(snapc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 				r = PTR_ERR(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 			page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 			ceph_queue_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 			r = wait_event_killable(ci->i_cap_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 						context_is_writeable_or_written(inode, snapc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 			ceph_put_snap_context(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 			if (r != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 		if (PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 			dout(" page %p already uptodate\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 		/* No need to read in some cases */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 		if (skip_page_read(page, pos, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 		 * We need to read it. If we get back -EINPROGRESS, then the page was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 		 * handed off to fscache and it will be unlocked when the read completes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 		 * Refind the page in that case so we can reacquire the page lock. Otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 		 * we got a hard error or the read was completed synchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 		r = ceph_do_readpage(file, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 		if (r != -EINPROGRESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 	if (r < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 		if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 		*pagep = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)  * we don't do anything in here that simple_write_end doesn't do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411)  * except adjust dirty page accounting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) static int ceph_write_end(struct file *file, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 			  loff_t pos, unsigned len, unsigned copied,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 			  struct page *page, void *fsdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 	struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 	bool check_cap = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 	dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 	     inode, page, (int)pos, (int)copied, (int)len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 	/* zero the stale part of the page if we did a short copy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 	if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 		if (copied < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 			copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 		SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 	/* did file size increase? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 	if (pos+copied > i_size_read(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 		check_cap = ceph_inode_set_size(inode, pos+copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 	set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 	unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 	put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 	if (check_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 		ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449)  * we set .direct_IO to indicate direct io is supported, but since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450)  * intercept O_DIRECT reads and writes early, this function should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451)  * never get called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 	WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) const struct address_space_operations ceph_aops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 	.readpage = ceph_readpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 	.readpages = ceph_readpages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	.writepage = ceph_writepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 	.writepages = ceph_writepages_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 	.write_begin = ceph_write_begin,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 	.write_end = ceph_write_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 	.set_page_dirty = ceph_set_page_dirty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 	.invalidatepage = ceph_invalidatepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 	.releasepage = ceph_releasepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 	.direct_IO = ceph_direct_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) static void ceph_block_sigs(sigset_t *oldset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 	sigset_t mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 	siginitsetinv(&mask, sigmask(SIGKILL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 	sigprocmask(SIG_BLOCK, &mask, oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) static void ceph_restore_sigs(sigset_t *oldset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 	sigprocmask(SIG_SETMASK, oldset, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485)  * vm ops
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 	struct vm_area_struct *vma = vmf->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 	struct inode *inode = file_inode(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 	struct ceph_file_info *fi = vma->vm_file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 	struct page *pinned_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 	loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	int want, got, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 	sigset_t oldset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 	vm_fault_t ret = VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	ceph_block_sigs(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 	dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	     inode, ceph_vinop(inode), off, (size_t)PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 		want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 		want = CEPH_CAP_FILE_CACHE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 	got = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 	err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 			    &got, &pinned_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 		goto out_restore;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 	dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	     inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 	    ci->i_inline_version == CEPH_INLINE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 		CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 		ceph_add_rw_context(fi, &rw_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 		ret = filemap_fault(vmf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 		ceph_del_rw_context(fi, &rw_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 		dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 			inode, off, (size_t)PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 				ceph_cap_string(got), ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 	} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 		err = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	if (pinned_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 		put_page(pinned_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 	ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 	if (err != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 		goto out_restore;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 	/* read inline data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 	if (off >= PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 		/* does not support inline data > PAGE_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 		ret = VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 		struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 		struct page *page = find_or_create_page(mapping, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 						mapping_gfp_constraint(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 						~__GFP_FS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 		if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 			ret = VM_FAULT_OOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 			goto out_inline;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 		err = __ceph_do_getattr(inode, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 					 CEPH_STAT_CAP_INLINE_DATA, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 		if (err < 0 || off >= i_size_read(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 			ret = vmf_error(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 			goto out_inline;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 		if (err < PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 			zero_user_segment(page, err, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 			flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 		SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 		vmf->page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 		ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) out_inline:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 		dout("filemap_fault %p %llu~%zd read inline data ret %x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 		     inode, off, (size_t)PAGE_SIZE, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) out_restore:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 	ceph_restore_sigs(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 		ret = vmf_error(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)  * Reuse write_begin here for simplicity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 	struct vm_area_struct *vma = vmf->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 	struct inode *inode = file_inode(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) 	struct ceph_file_info *fi = vma->vm_file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	struct ceph_cap_flush *prealloc_cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	struct page *page = vmf->page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 	loff_t off = page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	loff_t size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 	size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 	int want, got, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 	sigset_t oldset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 	vm_fault_t ret = VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 	prealloc_cf = ceph_alloc_cap_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 	if (!prealloc_cf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 		return VM_FAULT_OOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 	sb_start_pagefault(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 	ceph_block_sigs(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 	if (ci->i_inline_version != CEPH_INLINE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 		struct page *locked_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 		if (off == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 			lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 			locked_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 		err = ceph_uninline_data(vma->vm_file, locked_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 		if (locked_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 			unlock_page(locked_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 			goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	if (off + PAGE_SIZE <= size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 		len = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 		len = size & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 	dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	     inode, ceph_vinop(inode), off, len, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	if (fi->fmode & CEPH_FILE_MODE_LAZY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 		want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 		want = CEPH_CAP_FILE_BUFFER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 	got = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 	err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 			    &got, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 		goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 	dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 	     inode, off, len, ceph_cap_string(got));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 	/* Update time before taking page lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 	file_update_time(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 	inode_inc_iversion_raw(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 		struct ceph_snap_context *snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 		lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 		if (page_mkwrite_check_truncate(page, inode) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 			ret = VM_FAULT_NOPAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 		snapc = ceph_find_incompatible(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 		if (!snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 			/* success.  we'll keep the page locked. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 			set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 			ret = VM_FAULT_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 		unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 		if (IS_ERR(snapc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 			ret = VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 		ceph_queue_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 		err = wait_event_killable(ci->i_cap_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 				context_is_writeable_or_written(inode, snapc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 		ceph_put_snap_context(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 	} while (err == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 	if (ret == VM_FAULT_LOCKED ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 	    ci->i_inline_version != CEPH_INLINE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 		int dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 		spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 		ci->i_inline_version = CEPH_INLINE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 					       &prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 		spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 		if (dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 			__mark_inode_dirty(inode, dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 	dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 	     inode, off, len, ceph_cap_string(got), ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 	ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 	ceph_restore_sigs(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 	sb_end_pagefault(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 	ceph_free_cap_flush(prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 		ret = vmf_error(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 			   char	*data, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 	struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 	if (locked_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 		page = locked_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 		if (i_size_read(inode) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 		page = find_or_create_page(mapping, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 					   mapping_gfp_constraint(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 					   ~__GFP_FS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 		if (PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 	dout("fill_inline_data %p %llx.%llx len %zu locked_page %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 	     inode, ceph_vinop(inode), len, locked_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 		void *kaddr = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 		memcpy(kaddr, data, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 		kunmap_atomic(kaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 	if (page != locked_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 		if (len < PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 			zero_user_segment(page, len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 			flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 		SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 		unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) int ceph_uninline_data(struct file *filp, struct page *locked_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 	struct inode *inode = file_inode(filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 	struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 	struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 	u64 len, inline_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 	bool from_pagecache = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 	spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	inline_version = ci->i_inline_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 	spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	dout("uninline_data %p %llx.%llx inline_version %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 	     inode, ceph_vinop(inode), inline_version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 	if (inline_version == 1 || /* initial version, no data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 	    inline_version == CEPH_INLINE_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 	if (locked_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 		page = locked_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 		WARN_ON(!PageUptodate(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 	} else if (ceph_caps_issued(ci) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 		   (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 		page = find_get_page(inode->i_mapping, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 		if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 			if (PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 				from_pagecache = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 				lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 				put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 				page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 	if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 		len = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 		if (len > PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 			len = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 		page = __page_cache_alloc(GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 		if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 			err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 		err = __ceph_do_getattr(inode, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 					CEPH_STAT_CAP_INLINE_DATA, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 		if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 			/* no inline data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 			if (err == -ENODATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 				err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 		len = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 				    ceph_vino(inode), 0, &len, 0, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 				    CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 				    NULL, 0, 0, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 	if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 		err = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 	req->r_mtime = inode->i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 	if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) 		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 	ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 				    ceph_vino(inode), 0, &len, 1, 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 				    CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 				    NULL, ci->i_truncate_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 				    ci->i_truncate_size, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 	if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 		err = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 	osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 		__le64 xattr_buf = cpu_to_le64(inline_version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 		err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 					    "inline_version", &xattr_buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 					    sizeof(xattr_buf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 					    CEPH_OSD_CMPXATTR_OP_GT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 					    CEPH_OSD_CMPXATTR_MODE_U64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 			goto out_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 		char xattr_buf[32];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 		int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 					 "%llu", inline_version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 		err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 					    "inline_version",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 					    xattr_buf, xattr_len, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 			goto out_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 	req->r_mtime = inode->i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 	err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 	if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 		err = ceph_osdc_wait_request(&fsc->client->osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 	ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 				  req->r_end_latency, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) out_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 	ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 	if (err == -ECANCELED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 		err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 	if (page && page != locked_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 		if (from_pagecache) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 			__free_pages(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 	dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 	     inode, ceph_vinop(inode), inline_version, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) static const struct vm_operations_struct ceph_vmops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 	.fault		= ceph_filemap_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 	.page_mkwrite	= ceph_page_mkwrite,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) int ceph_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 	struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 	if (!mapping->a_ops->readpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 		return -ENOEXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 	file_accessed(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 	vma->vm_ops = &ceph_vmops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 	POOL_READ	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 	POOL_WRITE	= 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 				s64 pool, struct ceph_string *pool_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 	struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 	struct ceph_mds_client *mdsc = fsc->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 	struct ceph_osd_request *rd_req = NULL, *wr_req = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 	struct rb_node **p, *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 	struct ceph_pool_perm *perm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 	struct page **pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 	size_t pool_ns_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 	int err = 0, err2 = 0, have = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 	down_read(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 	p = &mdsc->pool_perm_tree.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 	while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 		perm = rb_entry(*p, struct ceph_pool_perm, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 		if (pool < perm->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 			p = &(*p)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 		else if (pool > perm->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 			p = &(*p)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 		else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 			int ret = ceph_compare_string(pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 						perm->pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 						perm->pool_ns_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 			if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 				p = &(*p)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 			else if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 				p = &(*p)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 			else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 				have = perm->perm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 	up_read(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 	if (*p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 	if (pool_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 		dout("__ceph_pool_perm_get pool %lld ns %.*s no perm cached\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 		     pool, (int)pool_ns->len, pool_ns->str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 		dout("__ceph_pool_perm_get pool %lld no perm cached\n", pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 	down_write(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 	p = &mdsc->pool_perm_tree.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 	parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 	while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) 		parent = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 		perm = rb_entry(parent, struct ceph_pool_perm, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 		if (pool < perm->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 			p = &(*p)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 		else if (pool > perm->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 			p = &(*p)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 		else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 			int ret = ceph_compare_string(pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 						perm->pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 						perm->pool_ns_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 			if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 				p = &(*p)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 			else if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 				p = &(*p)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 			else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 				have = perm->perm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 	if (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 		up_write(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 	rd_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 					 1, false, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 	if (!rd_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 		err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 	rd_req->r_flags = CEPH_OSD_FLAG_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 	osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 	rd_req->r_base_oloc.pool = pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 	if (pool_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 		rd_req->r_base_oloc.pool_ns = ceph_get_string(pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 	ceph_oid_printf(&rd_req->r_base_oid, "%llx.00000000", ci->i_vino.ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 	err = ceph_osdc_alloc_messages(rd_req, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	wr_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 					 1, false, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	if (!wr_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 		err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 	wr_req->r_flags = CEPH_OSD_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 	osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 	ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 	ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 	err = ceph_osdc_alloc_messages(wr_req, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 	/* one page should be large enough for STAT data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 	pages = ceph_alloc_page_vector(1, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 	if (IS_ERR(pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 		err = PTR_ERR(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 	osd_req_op_raw_data_in_pages(rd_req, 0, pages, PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 				     0, false, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 	err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 	wr_req->r_mtime = ci->vfs_inode.i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 	err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 	if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 		err = ceph_osdc_wait_request(&fsc->client->osdc, rd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 	if (!err2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 		err2 = ceph_osdc_wait_request(&fsc->client->osdc, wr_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 	if (err >= 0 || err == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 		have |= POOL_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 	else if (err != -EPERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 		if (err == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 			fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 	if (err2 == 0 || err2 == -EEXIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 		have |= POOL_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 	else if (err2 != -EPERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 		if (err2 == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 			fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 		err = err2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 	pool_ns_len = pool_ns ? pool_ns->len : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 	perm = kmalloc(sizeof(*perm) + pool_ns_len + 1, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 	if (!perm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 		err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 	perm->pool = pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 	perm->perm = have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 	perm->pool_ns_len = pool_ns_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 	if (pool_ns_len > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 		memcpy(perm->pool_ns, pool_ns->str, pool_ns_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 	perm->pool_ns[pool_ns_len] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 	rb_link_node(&perm->node, parent, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 	rb_insert_color(&perm->node, &mdsc->pool_perm_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 	err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 	up_write(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 	ceph_osdc_put_request(rd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 	ceph_osdc_put_request(wr_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 	if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 		err = have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 	if (pool_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 		dout("__ceph_pool_perm_get pool %lld ns %.*s result = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) 		     pool, (int)pool_ns->len, pool_ns->str, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) 		dout("__ceph_pool_perm_get pool %lld result = %d\n", pool, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) int ceph_pool_perm_check(struct inode *inode, int need)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	struct ceph_string *pool_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 	s64 pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 	int ret, flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 	if (ci->i_vino.snap != CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 		 * Pool permission check needs to write to the first object.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 		 * But for snapshot, head of the first object may have alread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 		 * been deleted. Skip check to avoid creating orphan object.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 	if (ceph_test_mount_opt(ceph_inode_to_client(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 				NOPOOLPERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 	spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 	flags = ci->i_ceph_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 	pool = ci->i_layout.pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) 	spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) check:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 	if (flags & CEPH_I_POOL_PERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 		if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 			dout("ceph_pool_perm_check pool %lld no read perm\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 			     pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 			return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 		if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 			dout("ceph_pool_perm_check pool %lld no write perm\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 			     pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 			return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 	pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 	ret = __ceph_pool_perm_get(ci, pool, pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 	ceph_put_string(pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 	if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 	flags = CEPH_I_POOL_PERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 	if (ret & POOL_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 		flags |= CEPH_I_POOL_RD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 	if (ret & POOL_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 		flags |= CEPH_I_POOL_WR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 	spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 	if (pool == ci->i_layout.pool_id &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 	    pool_ns == rcu_dereference_raw(ci->i_layout.pool_ns)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 		ci->i_ceph_flags |= flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128)         } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 		pool = ci->i_layout.pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 		flags = ci->i_ceph_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 	spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 	goto check;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) void ceph_pool_perm_destroy(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 	struct ceph_pool_perm *perm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 	struct rb_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 	while (!RB_EMPTY_ROOT(&mdsc->pool_perm_tree)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 		n = rb_first(&mdsc->pool_perm_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 		perm = rb_entry(n, struct ceph_pool_perm, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 		rb_erase(n, &mdsc->pool_perm_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 		kfree(perm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) }