^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/ceph/ceph_debug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/backing-dev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/writeback.h> /* generic_writepages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/pagevec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/task_io_accounting_ops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/iversion.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/ktime.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "super.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "mds_client.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "cache.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include "metric.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/ceph/osd_client.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/ceph/striper.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * Ceph address space ops.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * There are a few funny things going on here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * The page->private field is used to reference a struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * ceph_snap_context for _every_ dirty page. This indicates which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * snapshot the page was logically dirtied in, and thus which snap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * context needs to be associated with the osd write during writeback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * Similarly, struct ceph_inode_info maintains a set of counters to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * count dirty pages on the inode. In the absence of snapshots,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * i_wrbuffer_ref == i_wrbuffer_ref_head == the dirty page count.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * When a snapshot is taken (that is, when the client receives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * notification that a snapshot was taken), each inode with caps and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * with dirty pages (dirty pages implies there is a cap) gets a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * ceph_cap_snap in the i_cap_snaps list (which is sorted in ascending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * order, new snaps go to the tail). The i_wrbuffer_ref_head count is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * moved to capsnap->dirty. (Unless a sync write is currently in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * progress. In that case, the capsnap is said to be "pending", new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * writes cannot start, and the capsnap isn't "finalized" until the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * write completes (or fails) and a final size/mtime for the inode for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * that snap can be settled upon.) i_wrbuffer_ref_head is reset to 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * On writeback, we must submit writes to the osd IN SNAP ORDER. So,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * we look for the first capsnap in i_cap_snaps and write out pages in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * that snap context _only_. Then we move on to the next capsnap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * eventually reaching the "live" or "head" context (i.e., pages that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * are not yet snapped) and are writing the most recently dirtied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * Invalidate and so forth must take care to ensure the dirty page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * accounting is preserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #define CONGESTION_ON_THRESH(congestion_kb) (congestion_kb >> (PAGE_SHIFT-10))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #define CONGESTION_OFF_THRESH(congestion_kb) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) (CONGESTION_ON_THRESH(congestion_kb) - \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) (CONGESTION_ON_THRESH(congestion_kb) >> 2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) static inline struct ceph_snap_context *page_snap_context(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) if (PagePrivate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) return (void *)page->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * Dirty a page. Optimistically adjust accounting, on the assumption
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * that we won't race with invalidate. If we do, readjust.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) static int ceph_set_page_dirty(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) struct address_space *mapping = page->mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) struct ceph_inode_info *ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) struct ceph_snap_context *snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) if (PageDirty(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) dout("%p set_page_dirty %p idx %lu -- already dirty\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) mapping->host, page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) BUG_ON(!PagePrivate(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) /* dirty the head */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) BUG_ON(ci->i_wr_ref == 0); // caller should hold Fw reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) if (__ceph_have_pending_cap_snap(ci)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) struct ceph_cap_snap *capsnap =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) list_last_entry(&ci->i_cap_snaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) struct ceph_cap_snap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) ci_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) snapc = ceph_get_snap_context(capsnap->context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) capsnap->dirty_pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) BUG_ON(!ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) snapc = ceph_get_snap_context(ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) ++ci->i_wrbuffer_ref_head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) if (ci->i_wrbuffer_ref == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) ++ci->i_wrbuffer_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) dout("%p set_page_dirty %p idx %lu head %d/%d -> %d/%d "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) "snapc %p seq %lld (%d snaps)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) mapping->host, page, page->index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) ci->i_wrbuffer_ref-1, ci->i_wrbuffer_ref_head-1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) snapc, snapc->seq, snapc->num_snaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * Reference snap context in page->private. Also set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * PagePrivate so that we get invalidatepage callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) BUG_ON(PagePrivate(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) page->private = (unsigned long)snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) SetPagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) return __set_page_dirty_nobuffers(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * If we are truncating the full page (i.e. offset == 0), adjust the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * dirty page counters appropriately. Only called if there is private
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * data on the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) static void ceph_invalidatepage(struct page *page, unsigned int offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) unsigned int length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) struct ceph_inode_info *ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) struct ceph_snap_context *snapc = page_snap_context(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) if (offset != 0 || length != PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) inode, page, page->index, offset, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) ceph_invalidate_fscache_page(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) WARN_ON(!PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) if (!PagePrivate(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) dout("%p invalidatepage %p idx %lu full dirty page\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) inode, page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) ceph_put_snap_context(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) page->private = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) ClearPagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) static int ceph_releasepage(struct page *page, gfp_t g)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) dout("%p releasepage %p idx %lu (%sdirty)\n", page->mapping->host,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) page, page->index, PageDirty(page) ? "" : "not ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /* Can we release the page from the cache? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) if (!ceph_release_fscache_page(page, g))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) return !PagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) /* read a single page, without unlocking it. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) static int ceph_do_readpage(struct file *filp, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) struct inode *inode = file_inode(filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) struct ceph_osd_client *osdc = &fsc->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) struct ceph_vino vino = ceph_vino(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) u64 off = page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) u64 len = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) if (off >= i_size_read(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) zero_user_segment(page, 0, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) if (ci->i_inline_version != CEPH_INLINE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * Uptodate inline data should have been added
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * into page cache while getting Fcr caps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) if (off == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) zero_user_segment(page, 0, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) err = ceph_readpage_from_fscache(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) if (err == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) return -EINPROGRESS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) vino.ino, vino.snap, filp, off, len, page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, 0, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) ci->i_truncate_seq, ci->i_truncate_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) if (IS_ERR(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) return PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) err = ceph_osdc_start_request(osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) err = ceph_osdc_wait_request(osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) req->r_end_latency, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) dout("readpage result %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) if (err == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) ceph_fscache_readpage_cancel(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) if (err == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) if (err < PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) /* zero fill remainder of page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) zero_user_segment(page, err, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) ceph_readpage_to_fscache(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) return err < 0 ? err : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) static int ceph_readpage(struct file *filp, struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) int r = ceph_do_readpage(filp, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) if (r != -EINPROGRESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * Finish an async read(ahead) op.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) static void finish_read(struct ceph_osd_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) struct inode *inode = req->r_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) struct ceph_osd_data *osd_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) int rc = req->r_result <= 0 ? req->r_result : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) int bytes = req->r_result >= 0 ? req->r_result : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) int num_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) if (rc == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) ceph_inode_to_client(inode)->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) /* unlock all pages, zeroing any data we didn't read */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) osd_data = osd_req_op_extent_osd_data(req, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) num_pages = calc_pages_for((u64)osd_data->alignment,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) (u64)osd_data->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) for (i = 0; i < num_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) struct page *page = osd_data->pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) if (rc < 0 && rc != -ENOENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) ceph_fscache_readpage_cancel(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) if (bytes < (int)PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) /* zero (remainder of) page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) int s = bytes < 0 ? 0 : bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) zero_user_segment(page, s, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) dout("finish_read %p uptodate %p idx %lu\n", inode, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) ceph_readpage_to_fscache(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) bytes -= PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) req->r_end_latency, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) kfree(osd_data->pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * start an async read(ahead) operation. return nr_pages we submitted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * a read for on success, or negative error code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) struct list_head *page_list, int max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) struct ceph_osd_client *osdc =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) &ceph_inode_to_client(inode)->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) struct page *page = lru_to_page(page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) struct ceph_vino vino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) u64 off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) u64 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) struct page **pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) pgoff_t next_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) int nr_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) int got = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) if (!rw_ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) /* caller of readpages does not hold buffer and read caps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) * (fadvise, madvise and readahead cases) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) int want = CEPH_CAP_FILE_CACHE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) true, &got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) dout("start_read %p, error getting cap\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) } else if (!(got & want)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) dout("start_read %p, no cache cap\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) if (ret <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) if (got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) while (!list_empty(page_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) page = lru_to_page(page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) off = (u64) page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /* count pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) next_index = page->index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) list_for_each_entry_reverse(page, page_list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) if (page->index != next_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) nr_pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) next_index++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) if (max && nr_pages == max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) len = nr_pages << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) off, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) vino = ceph_vino(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 0, 1, CEPH_OSD_OP_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) CEPH_OSD_FLAG_READ, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) ci->i_truncate_seq, ci->i_truncate_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) ret = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) /* build page vector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) nr_pages = calc_pages_for(0, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) pages = kmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if (!pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) goto out_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) for (i = 0; i < nr_pages; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) page = list_entry(page_list->prev, struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) BUG_ON(PageLocked(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) list_del(&page->lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) dout("start_read %p adding %p idx %lu\n", inode, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) if (add_to_page_cache_lru(page, &inode->i_data, page->index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) GFP_KERNEL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) ceph_fscache_uncache_page(inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) dout("start_read %p add_to_page_cache failed %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) nr_pages = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) if (nr_pages > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) len = nr_pages << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) osd_req_op_extent_update(req, 0, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) goto out_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) pages[i] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) req->r_callback = finish_read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) req->r_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) ret = ceph_osdc_start_request(osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) goto out_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) /* After adding locked pages to page cache, the inode holds cache cap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) * So we can drop our cap refs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) if (got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) out_pages:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) for (i = 0; i < nr_pages; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) ceph_fscache_readpage_cancel(inode, pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) unlock_page(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) ceph_put_page_vector(pages, nr_pages, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) out_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) if (got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) * Read multiple pages. Leave pages we don't read + unlock in page_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) * the caller (VM) cleans them up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) static int ceph_readpages(struct file *file, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) struct list_head *page_list, unsigned nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) struct ceph_file_info *fi = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) struct ceph_rw_context *rw_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) int rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) int max = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) &nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) if (rc == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) rw_ctx = ceph_find_rw_context(fi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) max = fsc->mount_options->rsize >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) inode, file, rw_ctx, nr_pages, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) while (!list_empty(page_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) rc = start_read(inode, rw_ctx, page_list, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) if (rc < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) ceph_fscache_readpages_cancel(inode, page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) dout("readpages %p file %p ret %d\n", inode, file, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) struct ceph_writeback_ctl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) loff_t i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) u64 truncate_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) u32 truncate_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) bool size_stable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) bool head_snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) * Get ref for the oldest snapc for an inode with dirty data... that is, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) * only snap context we are allowed to write back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) static struct ceph_snap_context *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) get_oldest_context(struct inode *inode, struct ceph_writeback_ctl *ctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) struct ceph_snap_context *page_snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) struct ceph_snap_context *snapc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) struct ceph_cap_snap *capsnap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) dout(" cap_snap %p snapc %p has %d dirty pages\n", capsnap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) capsnap->context, capsnap->dirty_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) if (!capsnap->dirty_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) /* get i_size, truncate_{seq,size} for page_snapc? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) if (snapc && capsnap->context != page_snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) if (ctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (capsnap->writing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) ctl->i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) ctl->size_stable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) ctl->i_size = capsnap->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) ctl->size_stable = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) ctl->truncate_size = capsnap->truncate_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) ctl->truncate_seq = capsnap->truncate_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) ctl->head_snapc = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) if (snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) snapc = ceph_get_snap_context(capsnap->context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) if (!page_snapc ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) page_snapc == snapc ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) page_snapc->seq > snapc->seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) if (!snapc && ci->i_wrbuffer_ref_head) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) snapc = ceph_get_snap_context(ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) dout(" head snapc %p has %d dirty pages\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) snapc, ci->i_wrbuffer_ref_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) if (ctl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) ctl->i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) ctl->truncate_size = ci->i_truncate_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) ctl->truncate_seq = ci->i_truncate_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) ctl->size_stable = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) ctl->head_snapc = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) return snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) static u64 get_writepages_data_length(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) struct page *page, u64 start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) struct ceph_snap_context *snapc = page_snap_context(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) struct ceph_cap_snap *capsnap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) u64 end = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) if (snapc != ci->i_head_snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) bool found = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) if (capsnap->context == snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) if (!capsnap->writing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) end = capsnap->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) found = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) WARN_ON(!found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) if (end > page_offset(page) + PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) end = page_offset(page) + PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) return end > start ? end - start : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) * Write a single page, but leave the page locked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) * If we get a write error, mark the mapping for error, but still adjust the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) * dirty page accounting (i.e., page is no longer dirty).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) struct inode *inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) struct ceph_snap_context *snapc, *oldest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) loff_t page_off = page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) loff_t len = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) struct ceph_writeback_ctl ceph_wbc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) struct ceph_osd_client *osdc = &fsc->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) dout("writepage %p idx %lu\n", page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) /* verify this is a writeable snap context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) snapc = page_snap_context(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) if (!snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) dout("writepage %p page %p not dirty?\n", inode, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) oldest = get_oldest_context(inode, &ceph_wbc, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) if (snapc->seq > oldest->seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) dout("writepage %p page %p snapc %p not writeable - noop\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) inode, page, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) /* we should only noop if called by kswapd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) WARN_ON(!(current->flags & PF_MEMALLOC));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) ceph_put_snap_context(oldest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) redirty_page_for_writepage(wbc, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) ceph_put_snap_context(oldest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) /* is this a partial page at end of file? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) if (page_off >= ceph_wbc.i_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) dout("%p page eof %llu\n", page, ceph_wbc.i_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) if (ceph_wbc.i_size < page_off + len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) len = ceph_wbc.i_size - page_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) dout("writepage %p page %p index %lu on %llu~%llu snapc %p seq %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) inode, page, page->index, page_off, len, snapc, snapc->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) if (atomic_long_inc_return(&fsc->writeback_count) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) CONGESTION_ON_THRESH(fsc->mount_options->congestion_kb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) set_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) set_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) req = ceph_osdc_new_request(osdc, &ci->i_layout, ceph_vino(inode), page_off, &len, 0, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE, snapc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) ceph_wbc.truncate_seq, ceph_wbc.truncate_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) redirty_page_for_writepage(wbc, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) end_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) return PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) /* it may be a short write due to an object boundary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) WARN_ON_ONCE(len > PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) req->r_mtime = inode->i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) err = ceph_osdc_start_request(osdc, req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) err = ceph_osdc_wait_request(osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) req->r_end_latency, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) if (err == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) err = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) struct writeback_control tmp_wbc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) if (!wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) wbc = &tmp_wbc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) if (err == -ERESTARTSYS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) /* killed by SIGKILL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) dout("writepage interrupted page %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) redirty_page_for_writepage(wbc, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) end_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) if (err == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) dout("writepage setting page/mapping error %d %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) err, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) mapping_set_error(&inode->i_data, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) wbc->pages_skipped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) dout("writepage cleaned page %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) err = 0; /* vfs expects us to return 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) page->private = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) ClearPagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) end_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) ceph_put_snap_context(snapc); /* page's reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (atomic_long_dec_return(&fsc->writeback_count) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) static int ceph_writepage(struct page *page, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) struct inode *inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) BUG_ON(!inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) err = writepage_nounlock(page, wbc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) if (err == -ERESTARTSYS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) /* direct memory reclaimer was killed by SIGKILL. return 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) * to prevent caller from setting mapping/page error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) * async writeback completion handler.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) * If we get an error, set the mapping error bit, but not the individual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) * page error bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) static void writepages_finish(struct ceph_osd_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) struct inode *inode = req->r_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) struct ceph_osd_data *osd_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) int num_pages, total_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) int rc = req->r_result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) struct ceph_snap_context *snapc = req->r_snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) bool remove_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) dout("writepages_finish %p rc %d\n", inode, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) if (rc < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) mapping_set_error(mapping, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) ceph_set_error_write(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) if (rc == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) ceph_clear_error_write(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) req->r_end_latency, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) * We lost the cache cap, need to truncate the page before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) * it is unlocked, otherwise we'd truncate it later in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) * page truncation thread, possibly losing some data that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) * raced its way in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) remove_page = !(ceph_caps_issued(ci) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) /* clean all pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) for (i = 0; i < req->r_num_ops; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) if (req->r_ops[i].op != CEPH_OSD_OP_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) osd_data = osd_req_op_extent_osd_data(req, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) num_pages = calc_pages_for((u64)osd_data->alignment,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) (u64)osd_data->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) total_pages += num_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) for (j = 0; j < num_pages; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) page = osd_data->pages[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) BUG_ON(!page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) WARN_ON(!PageUptodate(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) if (atomic_long_dec_return(&fsc->writeback_count) <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) CONGESTION_OFF_THRESH(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) fsc->mount_options->congestion_kb))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) clear_bdi_congested(inode_to_bdi(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) BLK_RW_ASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) ceph_put_snap_context(page_snap_context(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) page->private = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) ClearPagePrivate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) dout("unlocking %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) end_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) if (remove_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) generic_error_remove_page(inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) inode, osd_data->length, rc >= 0 ? num_pages : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) release_pages(osd_data->pages, num_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) osd_data = osd_req_op_extent_osd_data(req, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) if (osd_data->pages_from_pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) mempool_free(osd_data->pages, ceph_wb_pagevec_pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) kfree(osd_data->pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) * initiate async writeback
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) static int ceph_writepages_start(struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) struct inode *inode = mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) struct ceph_vino vino = ceph_vino(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) pgoff_t index, start_index, end = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) struct ceph_snap_context *snapc = NULL, *last_snapc = NULL, *pgsnapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) struct pagevec pvec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) int rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) unsigned int wsize = i_blocksize(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) struct ceph_osd_request *req = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) struct ceph_writeback_ctl ceph_wbc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) bool should_loop, range_whole = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) bool done = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) dout("writepages_start %p (mode=%s)\n", inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) if (ci->i_wrbuffer_ref > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) pr_warn_ratelimited(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) "writepage_start %p %lld forced umount\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) inode, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) mapping_set_error(mapping, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) return -EIO; /* we're in a forced umount, don't write! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) if (fsc->mount_options->wsize < wsize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) wsize = fsc->mount_options->wsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) pagevec_init(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) start_index = wbc->range_cyclic ? mapping->writeback_index : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) index = start_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) /* find oldest snap context with dirty data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) snapc = get_oldest_context(inode, &ceph_wbc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) if (!snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) /* hmm, why does writepages get called when there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) is no dirty data? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) dout(" no snap context with dirty data?\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) dout(" oldest snapc is %p seq %lld (%d snaps)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) snapc, snapc->seq, snapc->num_snaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) should_loop = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) if (ceph_wbc.head_snapc && snapc != last_snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) /* where to start/end? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) if (wbc->range_cyclic) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) index = start_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) end = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) if (index > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) should_loop = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) dout(" cyclic, start at %lu\n", index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) index = wbc->range_start >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) end = wbc->range_end >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) range_whole = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) dout(" not cyclic, %lu to %lu\n", index, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) } else if (!ceph_wbc.head_snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) /* Do not respect wbc->range_{start,end}. Dirty pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) * in that range can be associated with newer snapc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) * They are not writeable until we write all dirty pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) * associated with 'snapc' get written */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) if (index > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) should_loop = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) dout(" non-head snapc, range whole\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) ceph_put_snap_context(last_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) last_snapc = snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) while (!done && index <= end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) int num_ops = 0, op_idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) unsigned i, pvec_pages, max_pages, locked_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) struct page **pages = NULL, **data_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) pgoff_t strip_unit_end = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) u64 offset = 0, len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) bool from_pool = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) max_pages = wsize >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) get_more_pages:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) pvec_pages = pagevec_lookup_range_tag(&pvec, mapping, &index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) end, PAGECACHE_TAG_DIRTY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) dout("pagevec_lookup_range_tag got %d\n", pvec_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) if (!pvec_pages && !locked_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) for (i = 0; i < pvec_pages && locked_pages < max_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) page = pvec.pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) dout("? %p idx %lu\n", page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) if (locked_pages == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) lock_page(page); /* first page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) else if (!trylock_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) /* only dirty pages, or our accounting breaks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) if (unlikely(!PageDirty(page)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) unlikely(page->mapping != mapping)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) dout("!dirty or !mapping %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) /* only if matching snap context */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) pgsnapc = page_snap_context(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) if (pgsnapc != snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) dout("page snapc %p %lld != oldest %p %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) pgsnapc, pgsnapc->seq, snapc, snapc->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) if (!should_loop &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) !ceph_wbc.head_snapc &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) wbc->sync_mode != WB_SYNC_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) should_loop = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) if (page_offset(page) >= ceph_wbc.i_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) dout("%p page eof %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) page, ceph_wbc.i_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) if ((ceph_wbc.size_stable ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) page_offset(page) >= i_size_read(inode)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) clear_page_dirty_for_io(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) mapping->a_ops->invalidatepage(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) 0, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) if (strip_unit_end && (page->index > strip_unit_end)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) dout("end of strip unit %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) if (PageWriteback(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) if (wbc->sync_mode == WB_SYNC_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) dout("%p under writeback\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) dout("waiting on writeback %p\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) wait_on_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) if (!clear_page_dirty_for_io(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) dout("%p !clear_page_dirty_for_io\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) * We have something to write. If this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) * the first locked page this time through,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) * calculate max possinle write size and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) * allocate a page array
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) if (locked_pages == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) u64 objnum;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) u64 objoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) u32 xlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) /* prepare async write request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) offset = (u64)page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) ceph_calc_file_object_mapping(&ci->i_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) offset, wsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) &objnum, &objoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) &xlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) len = xlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) num_ops = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) strip_unit_end = page->index +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) ((len - 1) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) BUG_ON(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) max_pages = calc_pages_for(0, (u64)len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) pages = kmalloc_array(max_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) sizeof(*pages),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) if (!pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) from_pool = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) BUG_ON(!pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) } else if (page->index !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) (offset + len) >> PAGE_SHIFT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) if (num_ops >= (from_pool ? CEPH_OSD_SLAB_OPS :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) CEPH_OSD_MAX_OPS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) redirty_page_for_writepage(wbc, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) num_ops++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) offset = (u64)page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) /* note position of first page in pvec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) dout("%p will write page %p idx %lu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) inode, page, page->index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) if (atomic_long_inc_return(&fsc->writeback_count) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) CONGESTION_ON_THRESH(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) fsc->mount_options->congestion_kb)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) set_bdi_congested(inode_to_bdi(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) BLK_RW_ASYNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) pages[locked_pages++] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) pvec.pages[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) len += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) /* did we get anything? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) if (!locked_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) goto release_pvec_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) if (i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) unsigned j, n = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) /* shift unused page to beginning of pvec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) for (j = 0; j < pvec_pages; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) if (!pvec.pages[j])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) if (n < j)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) pvec.pages[n] = pvec.pages[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) n++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) pvec.nr = n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) if (pvec_pages && i == pvec_pages &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) locked_pages < max_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) dout("reached end pvec, trying for more\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) goto get_more_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) new_request:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) offset = page_offset(pages[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) len = wsize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) req = ceph_osdc_new_request(&fsc->client->osdc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) &ci->i_layout, vino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) offset, &len, 0, num_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) snapc, ceph_wbc.truncate_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) ceph_wbc.truncate_size, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) req = ceph_osdc_new_request(&fsc->client->osdc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) &ci->i_layout, vino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) offset, &len, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) min(num_ops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) CEPH_OSD_SLAB_OPS),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) CEPH_OSD_OP_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) CEPH_OSD_FLAG_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) snapc, ceph_wbc.truncate_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) ceph_wbc.truncate_size, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) BUG_ON(IS_ERR(req));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) BUG_ON(len < page_offset(pages[locked_pages - 1]) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) PAGE_SIZE - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) req->r_callback = writepages_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) req->r_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) /* Format the osd request message and submit the write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) data_pages = pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) op_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) for (i = 0; i < locked_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) u64 cur_offset = page_offset(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) if (offset + len != cur_offset) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) if (op_idx + 1 == req->r_num_ops)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) osd_req_op_extent_dup_last(req, op_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) cur_offset - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) dout("writepages got pages at %llu~%llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) offset, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) osd_req_op_extent_osd_data_pages(req, op_idx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) data_pages, len, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) from_pool, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) osd_req_op_extent_update(req, op_idx, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) offset = cur_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) data_pages = pages + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) op_idx++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) set_page_writeback(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) len += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) if (ceph_wbc.size_stable) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) len = min(len, ceph_wbc.i_size - offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) } else if (i == locked_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) /* writepages_finish() clears writeback pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) * according to the data length, so make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) * data length covers all locked pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) u64 min_len = len + 1 - PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) len = get_writepages_data_length(inode, pages[i - 1],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) offset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) len = max(len, min_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) dout("writepages got pages at %llu~%llu\n", offset, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 0, from_pool, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) osd_req_op_extent_update(req, op_idx, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) BUG_ON(op_idx + 1 != req->r_num_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) from_pool = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) if (i < locked_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) BUG_ON(num_ops <= req->r_num_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) num_ops -= req->r_num_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) locked_pages -= i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) /* allocate new pages array for next request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) data_pages = pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) pages = kmalloc_array(locked_pages, sizeof(*pages),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) if (!pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) from_pool = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) pages = mempool_alloc(ceph_wb_pagevec_pool, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) BUG_ON(!pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) memcpy(pages, data_pages + i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) locked_pages * sizeof(*pages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) memset(data_pages + i, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) locked_pages * sizeof(*pages));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) BUG_ON(num_ops != req->r_num_ops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) index = pages[i - 1]->index + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) /* request message now owns the pages array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) pages = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) req->r_mtime = inode->i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) rc = ceph_osdc_start_request(&fsc->client->osdc, req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) BUG_ON(rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) req = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) wbc->nr_to_write -= i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) if (pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) goto new_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) * We stop writing back only if we are not doing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) * integrity sync. In case of integrity sync we have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) * keep going until we have written all the pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) * we tagged for writeback prior to entering this loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) if (wbc->nr_to_write <= 0 && wbc->sync_mode == WB_SYNC_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) done = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) release_pvec_pages:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) dout("pagevec_release on %d pages (%p)\n", (int)pvec.nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) pvec.nr ? pvec.pages[0] : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) if (should_loop && !done) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) /* more to do; loop back to beginning of file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) dout("writepages looping back to beginning of file\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) end = start_index - 1; /* OK even when start_index == 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) /* to write dirty pages associated with next snapc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) * we need to wait until current writes complete */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) if (wbc->sync_mode != WB_SYNC_NONE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) start_index == 0 && /* all dirty pages were checked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) !ceph_wbc.head_snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) unsigned i, nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) while ((index <= end) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) (nr = pagevec_lookup_tag(&pvec, mapping, &index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) PAGECACHE_TAG_WRITEBACK))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) page = pvec.pages[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) if (page_snap_context(page) != snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) wait_on_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) pagevec_release(&pvec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) start_index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) mapping->writeback_index = index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) ceph_put_snap_context(last_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) dout("writepages dend - startone, rc = %d\n", rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) * See if a given @snapc is either writeable, or already written.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) static int context_is_writeable_or_written(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) struct ceph_snap_context *snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) struct ceph_snap_context *oldest = get_oldest_context(inode, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) int ret = !oldest || snapc->seq <= oldest->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) ceph_put_snap_context(oldest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) * ceph_find_incompatible - find an incompatible context and return it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) * @page: page being dirtied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) * We are only allowed to write into/dirty a page if the page is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) * clean, or already dirty within the same snap context. Returns a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) * conflicting context if there is one, NULL if there isn't, or a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) * negative error code on other errors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) * Must be called with page lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) static struct ceph_snap_context *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) ceph_find_incompatible(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) struct inode *inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) dout(" page %p forced umount\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) return ERR_PTR(-EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) struct ceph_snap_context *snapc, *oldest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) wait_on_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) snapc = page_snap_context(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) if (!snapc || snapc == ci->i_head_snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) * this page is already dirty in another (older) snap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) * context! is it writeable now?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) oldest = get_oldest_context(inode, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) if (snapc->seq > oldest->seq) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) /* not writeable -- return it for the caller to deal with */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) ceph_put_snap_context(oldest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) dout(" page %p snapc %p not current or oldest\n", page, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) return ceph_get_snap_context(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) ceph_put_snap_context(oldest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) /* yay, writeable, do it now (without dropping page lock) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) dout(" page %p snapc %p not current, but oldest\n", page, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) if (clear_page_dirty_for_io(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) int r = writepage_nounlock(page, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) return ERR_PTR(r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) * prep_noread_page - prep a page for writing without reading first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) * @page: page being prepared
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) * @pos: starting position for the write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) * @len: length of write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) * In some cases, write_begin doesn't need to read at all:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) * - full page write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) * - file is currently zero-length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) * - write that lies in a page that is completely beyond EOF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) * - write that covers the the page from start to EOF or beyond it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) * If any of these criteria are met, then zero out the unwritten parts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) * of the page and return true. Otherwise, return false.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) static bool skip_page_read(struct page *page, loff_t pos, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) struct inode *inode = page->mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) loff_t i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) size_t offset = offset_in_page(pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) /* Full page write */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) if (offset == 0 && len >= PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) /* pos beyond last page in the file */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) if (pos - offset >= i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) goto zero_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) /* write that covers the whole page from start to EOF or beyond it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) if (offset == 0 && (pos + len) >= i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) goto zero_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) zero_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) zero_user_segments(page, 0, offset, offset + len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) * We are only allowed to write into/dirty the page if the page is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) * clean, or already dirty within the same snap context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) static int ceph_write_begin(struct file *file, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) loff_t pos, unsigned len, unsigned flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) struct page **pagep, void **fsdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) struct ceph_snap_context *snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) pgoff_t index = pos >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) int r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) page = grab_cache_page_write_begin(mapping, index, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) r = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) snapc = ceph_find_incompatible(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) if (snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) if (IS_ERR(snapc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) r = PTR_ERR(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) ceph_queue_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) r = wait_event_killable(ci->i_cap_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) context_is_writeable_or_written(inode, snapc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) ceph_put_snap_context(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) if (r != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) if (PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) dout(" page %p already uptodate\n", page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) /* No need to read in some cases */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) if (skip_page_read(page, pos, len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) * We need to read it. If we get back -EINPROGRESS, then the page was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) * handed off to fscache and it will be unlocked when the read completes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) * Refind the page in that case so we can reacquire the page lock. Otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) * we got a hard error or the read was completed synchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) r = ceph_do_readpage(file, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) if (r != -EINPROGRESS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) if (r < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) *pagep = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) * we don't do anything in here that simple_write_end doesn't do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) * except adjust dirty page accounting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) static int ceph_write_end(struct file *file, struct address_space *mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) loff_t pos, unsigned len, unsigned copied,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) struct page *page, void *fsdata)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) bool check_cap = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) inode, page, (int)pos, (int)copied, (int)len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) /* zero the stale part of the page if we did a short copy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) if (!PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) if (copied < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) copied = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) /* did file size increase? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) if (pos+copied > i_size_read(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) check_cap = ceph_inode_set_size(inode, pos+copied);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) if (check_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) return copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) * we set .direct_IO to indicate direct io is supported, but since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) * intercept O_DIRECT reads and writes early, this function should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) * never get called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) static ssize_t ceph_direct_io(struct kiocb *iocb, struct iov_iter *iter)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) const struct address_space_operations ceph_aops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) .readpage = ceph_readpage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) .readpages = ceph_readpages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) .writepage = ceph_writepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) .writepages = ceph_writepages_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) .write_begin = ceph_write_begin,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) .write_end = ceph_write_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) .set_page_dirty = ceph_set_page_dirty,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) .invalidatepage = ceph_invalidatepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) .releasepage = ceph_releasepage,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) .direct_IO = ceph_direct_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) static void ceph_block_sigs(sigset_t *oldset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) sigset_t mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) siginitsetinv(&mask, sigmask(SIGKILL));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) sigprocmask(SIG_BLOCK, &mask, oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) static void ceph_restore_sigs(sigset_t *oldset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) sigprocmask(SIG_SETMASK, oldset, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) * vm ops
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) static vm_fault_t ceph_filemap_fault(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) struct vm_area_struct *vma = vmf->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) struct inode *inode = file_inode(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) struct ceph_file_info *fi = vma->vm_file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) struct page *pinned_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) int want, got, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) sigset_t oldset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) vm_fault_t ret = VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) ceph_block_sigs(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) inode, ceph_vinop(inode), off, (size_t)PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) if (fi->fmode & CEPH_FILE_MODE_LAZY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) want = CEPH_CAP_FILE_CACHE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) got = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) &got, &pinned_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) goto out_restore;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) ci->i_inline_version == CEPH_INLINE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) ceph_add_rw_context(fi, &rw_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) ret = filemap_fault(vmf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) ceph_del_rw_context(fi, &rw_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) inode, off, (size_t)PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) ceph_cap_string(got), ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) err = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) if (pinned_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) put_page(pinned_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) if (err != -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) goto out_restore;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) /* read inline data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) if (off >= PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) /* does not support inline data > PAGE_SIZE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) ret = VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) struct page *page = find_or_create_page(mapping, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) mapping_gfp_constraint(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) ~__GFP_FS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) ret = VM_FAULT_OOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) goto out_inline;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) err = __ceph_do_getattr(inode, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) CEPH_STAT_CAP_INLINE_DATA, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) if (err < 0 || off >= i_size_read(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) ret = vmf_error(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) goto out_inline;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) if (err < PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) zero_user_segment(page, err, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) vmf->page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) out_inline:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) dout("filemap_fault %p %llu~%zd read inline data ret %x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) inode, off, (size_t)PAGE_SIZE, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) out_restore:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) ceph_restore_sigs(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) ret = vmf_error(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) * Reuse write_begin here for simplicity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) struct vm_area_struct *vma = vmf->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) struct inode *inode = file_inode(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) struct ceph_file_info *fi = vma->vm_file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) struct ceph_cap_flush *prealloc_cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) struct page *page = vmf->page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) loff_t off = page_offset(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) loff_t size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) size_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) int want, got, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) sigset_t oldset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) vm_fault_t ret = VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) prealloc_cf = ceph_alloc_cap_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) if (!prealloc_cf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) return VM_FAULT_OOM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) sb_start_pagefault(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) ceph_block_sigs(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) if (ci->i_inline_version != CEPH_INLINE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) struct page *locked_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) if (off == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) locked_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) err = ceph_uninline_data(vma->vm_file, locked_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) if (locked_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) unlock_page(locked_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) if (off + PAGE_SIZE <= size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) len = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) len = size & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) inode, ceph_vinop(inode), off, len, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) if (fi->fmode & CEPH_FILE_MODE_LAZY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) want = CEPH_CAP_FILE_BUFFER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) got = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) &got, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) goto out_free;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) dout("page_mkwrite %p %llu~%zd got cap refs on %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) inode, off, len, ceph_cap_string(got));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) /* Update time before taking page lock */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) file_update_time(vma->vm_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) inode_inc_iversion_raw(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) struct ceph_snap_context *snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) if (page_mkwrite_check_truncate(page, inode) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) ret = VM_FAULT_NOPAGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) snapc = ceph_find_incompatible(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) if (!snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) /* success. we'll keep the page locked. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) ret = VM_FAULT_LOCKED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) if (IS_ERR(snapc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) ret = VM_FAULT_SIGBUS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) ceph_queue_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) err = wait_event_killable(ci->i_cap_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) context_is_writeable_or_written(inode, snapc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) ceph_put_snap_context(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) } while (err == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) if (ret == VM_FAULT_LOCKED ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) ci->i_inline_version != CEPH_INLINE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) int dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) ci->i_inline_version = CEPH_INLINE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) &prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) if (dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) __mark_inode_dirty(inode, dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) dout("page_mkwrite %p %llu~%zd dropping cap refs on %s ret %x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) inode, off, len, ceph_cap_string(got), ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) out_free:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) ceph_restore_sigs(&oldset);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) sb_end_pagefault(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) ceph_free_cap_flush(prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) ret = vmf_error(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) char *data, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) struct address_space *mapping = inode->i_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) if (locked_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) page = locked_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) if (i_size_read(inode) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) page = find_or_create_page(mapping, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) mapping_gfp_constraint(mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) ~__GFP_FS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) if (PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) dout("fill_inline_data %p %llx.%llx len %zu locked_page %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) inode, ceph_vinop(inode), len, locked_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) void *kaddr = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) memcpy(kaddr, data, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) kunmap_atomic(kaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) if (page != locked_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) if (len < PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) zero_user_segment(page, len, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) SetPageUptodate(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) int ceph_uninline_data(struct file *filp, struct page *locked_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) struct inode *inode = file_inode(filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) u64 len, inline_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) bool from_pagecache = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) inline_version = ci->i_inline_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) dout("uninline_data %p %llx.%llx inline_version %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) inode, ceph_vinop(inode), inline_version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) if (inline_version == 1 || /* initial version, no data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) inline_version == CEPH_INLINE_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) if (locked_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) page = locked_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) WARN_ON(!PageUptodate(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) } else if (ceph_caps_issued(ci) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) page = find_get_page(inode->i_mapping, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) if (PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) from_pagecache = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) len = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) if (len > PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) len = PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) page = __page_cache_alloc(GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) err = __ceph_do_getattr(inode, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) CEPH_STAT_CAP_INLINE_DATA, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) /* no inline data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) if (err == -ENODATA)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) len = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) ceph_vino(inode), 0, &len, 0, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) NULL, 0, 0, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) err = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) req->r_mtime = inode->i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) err = ceph_osdc_wait_request(&fsc->client->osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) ceph_vino(inode), 0, &len, 1, 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) NULL, ci->i_truncate_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) ci->i_truncate_size, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) err = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) osd_req_op_extent_osd_data_pages(req, 1, &page, len, 0, false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) __le64 xattr_buf = cpu_to_le64(inline_version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) err = osd_req_op_xattr_init(req, 0, CEPH_OSD_OP_CMPXATTR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) "inline_version", &xattr_buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) sizeof(xattr_buf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) CEPH_OSD_CMPXATTR_OP_GT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) CEPH_OSD_CMPXATTR_MODE_U64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) goto out_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) char xattr_buf[32];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) int xattr_len = snprintf(xattr_buf, sizeof(xattr_buf),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) "%llu", inline_version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) err = osd_req_op_xattr_init(req, 2, CEPH_OSD_OP_SETXATTR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) "inline_version",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) xattr_buf, xattr_len, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) goto out_put;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) req->r_mtime = inode->i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) err = ceph_osdc_start_request(&fsc->client->osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) err = ceph_osdc_wait_request(&fsc->client->osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) req->r_end_latency, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) out_put:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) if (err == -ECANCELED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) if (page && page != locked_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) if (from_pagecache) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) __free_pages(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) dout("uninline_data %p %llx.%llx inline_version %llu = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) inode, ceph_vinop(inode), inline_version, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) static const struct vm_operations_struct ceph_vmops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) .fault = ceph_filemap_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) .page_mkwrite = ceph_page_mkwrite,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) int ceph_mmap(struct file *file, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) struct address_space *mapping = file->f_mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) if (!mapping->a_ops->readpage)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) return -ENOEXEC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) file_accessed(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) vma->vm_ops = &ceph_vmops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) POOL_READ = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) POOL_WRITE = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) s64 pool, struct ceph_string *pool_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) struct ceph_mds_client *mdsc = fsc->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) struct ceph_osd_request *rd_req = NULL, *wr_req = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) struct rb_node **p, *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) struct ceph_pool_perm *perm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) struct page **pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) size_t pool_ns_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) int err = 0, err2 = 0, have = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) down_read(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) p = &mdsc->pool_perm_tree.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) perm = rb_entry(*p, struct ceph_pool_perm, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) if (pool < perm->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) p = &(*p)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) else if (pool > perm->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) p = &(*p)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) int ret = ceph_compare_string(pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) perm->pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) perm->pool_ns_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) p = &(*p)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) else if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) p = &(*p)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) have = perm->perm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) up_read(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) if (*p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) if (pool_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) dout("__ceph_pool_perm_get pool %lld ns %.*s no perm cached\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) pool, (int)pool_ns->len, pool_ns->str);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) dout("__ceph_pool_perm_get pool %lld no perm cached\n", pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) down_write(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) p = &mdsc->pool_perm_tree.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) parent = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) perm = rb_entry(parent, struct ceph_pool_perm, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) if (pool < perm->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) p = &(*p)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) else if (pool > perm->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) p = &(*p)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) int ret = ceph_compare_string(pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) perm->pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) perm->pool_ns_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) p = &(*p)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) else if (ret > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) p = &(*p)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) have = perm->perm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) if (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) up_write(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) rd_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 1, false, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) if (!rd_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) rd_req->r_flags = CEPH_OSD_FLAG_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) osd_req_op_init(rd_req, 0, CEPH_OSD_OP_STAT, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) rd_req->r_base_oloc.pool = pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) if (pool_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) rd_req->r_base_oloc.pool_ns = ceph_get_string(pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) ceph_oid_printf(&rd_req->r_base_oid, "%llx.00000000", ci->i_vino.ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) err = ceph_osdc_alloc_messages(rd_req, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) wr_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 1, false, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) if (!wr_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) wr_req->r_flags = CEPH_OSD_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) err = ceph_osdc_alloc_messages(wr_req, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) /* one page should be large enough for STAT data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) pages = ceph_alloc_page_vector(1, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) if (IS_ERR(pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) err = PTR_ERR(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) osd_req_op_raw_data_in_pages(rd_req, 0, pages, PAGE_SIZE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 0, false, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) wr_req->r_mtime = ci->vfs_inode.i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) err = ceph_osdc_wait_request(&fsc->client->osdc, rd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) if (!err2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) err2 = ceph_osdc_wait_request(&fsc->client->osdc, wr_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) if (err >= 0 || err == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) have |= POOL_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) else if (err != -EPERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) if (err == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) if (err2 == 0 || err2 == -EEXIST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) have |= POOL_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) else if (err2 != -EPERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) if (err2 == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) err = err2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) pool_ns_len = pool_ns ? pool_ns->len : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) perm = kmalloc(sizeof(*perm) + pool_ns_len + 1, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) if (!perm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) perm->pool = pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) perm->perm = have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) perm->pool_ns_len = pool_ns_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) if (pool_ns_len > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) memcpy(perm->pool_ns, pool_ns->str, pool_ns_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) perm->pool_ns[pool_ns_len] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) rb_link_node(&perm->node, parent, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) rb_insert_color(&perm->node, &mdsc->pool_perm_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) up_write(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) ceph_osdc_put_request(rd_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) ceph_osdc_put_request(wr_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) err = have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) if (pool_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) dout("__ceph_pool_perm_get pool %lld ns %.*s result = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) pool, (int)pool_ns->len, pool_ns->str, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) dout("__ceph_pool_perm_get pool %lld result = %d\n", pool, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) int ceph_pool_perm_check(struct inode *inode, int need)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) struct ceph_string *pool_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) s64 pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) int ret, flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) if (ci->i_vino.snap != CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) * Pool permission check needs to write to the first object.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) * But for snapshot, head of the first object may have alread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) * been deleted. Skip check to avoid creating orphan object.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) if (ceph_test_mount_opt(ceph_inode_to_client(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) NOPOOLPERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) flags = ci->i_ceph_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) pool = ci->i_layout.pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) check:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) if (flags & CEPH_I_POOL_PERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) if ((need & CEPH_CAP_FILE_RD) && !(flags & CEPH_I_POOL_RD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) dout("ceph_pool_perm_check pool %lld no read perm\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) if ((need & CEPH_CAP_FILE_WR) && !(flags & CEPH_I_POOL_WR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) dout("ceph_pool_perm_check pool %lld no write perm\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) ret = __ceph_pool_perm_get(ci, pool, pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) ceph_put_string(pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) flags = CEPH_I_POOL_PERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) if (ret & POOL_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) flags |= CEPH_I_POOL_RD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) if (ret & POOL_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) flags |= CEPH_I_POOL_WR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) if (pool == ci->i_layout.pool_id &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) pool_ns == rcu_dereference_raw(ci->i_layout.pool_ns)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) ci->i_ceph_flags |= flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) pool = ci->i_layout.pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) flags = ci->i_ceph_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) goto check;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) void ceph_pool_perm_destroy(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) struct ceph_pool_perm *perm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) struct rb_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) while (!RB_EMPTY_ROOT(&mdsc->pool_perm_tree)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) n = rb_first(&mdsc->pool_perm_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) perm = rb_entry(n, struct ceph_pool_perm, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) rb_erase(n, &mdsc->pool_perm_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) kfree(perm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) }