^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/ceph/ceph_debug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/ceph/striper.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/mount.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/namei.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/falloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/iversion.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/ktime.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "super.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "mds_client.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include "cache.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include "io.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include "metric.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) static __le32 ceph_flags_sys2wire(u32 flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) u32 wire_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) switch (flags & O_ACCMODE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) case O_RDONLY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) wire_flags |= CEPH_O_RDONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) case O_WRONLY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) wire_flags |= CEPH_O_WRONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) case O_RDWR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) wire_flags |= CEPH_O_RDWR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) flags &= ~O_ACCMODE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #define ceph_sys2wire(a) if (flags & a) { wire_flags |= CEPH_##a; flags &= ~a; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) ceph_sys2wire(O_CREAT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) ceph_sys2wire(O_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) ceph_sys2wire(O_TRUNC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) ceph_sys2wire(O_DIRECTORY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) ceph_sys2wire(O_NOFOLLOW);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #undef ceph_sys2wire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) if (flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) dout("unused open flags: %x\n", flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) return cpu_to_le32(wire_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * Ceph file operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * Implement basic open/close functionality, and implement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * read/write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * We implement three modes of file I/O:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) * - buffered uses the generic_file_aio_{read,write} helpers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * - synchronous is used when there is multi-client read/write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * sharing, avoids the page cache, and synchronously waits for an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * ack from the OSD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * - direct io takes the variant of the sync path that references
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * user pages directly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * fsync() flushes and waits on dirty pages, but just queues metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * for writeback: since the MDS can recover size and mtime there is no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * need to wait for MDS acknowledgement.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * How many pages to get in one call to iov_iter_get_pages(). This
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * determines the size of the on-stack array used as a buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #define ITER_GET_BVECS_PAGES 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) static ssize_t __iter_get_bvecs(struct iov_iter *iter, size_t maxsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) struct bio_vec *bvecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) size_t size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) int bvec_idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) if (maxsize > iov_iter_count(iter))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) maxsize = iov_iter_count(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) while (size < maxsize) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) struct page *pages[ITER_GET_BVECS_PAGES];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) ssize_t bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) size_t start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) int idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) bytes = iov_iter_get_pages(iter, pages, maxsize - size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) ITER_GET_BVECS_PAGES, &start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) if (bytes < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) return size ?: bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) iov_iter_advance(iter, bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) size += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) for ( ; bytes; idx++, bvec_idx++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) struct bio_vec bv = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) .bv_page = pages[idx],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) .bv_len = min_t(int, bytes, PAGE_SIZE - start),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) .bv_offset = start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) bvecs[bvec_idx] = bv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) bytes -= bv.bv_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) start = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) return size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * iov_iter_get_pages() only considers one iov_iter segment, no matter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * what maxsize or maxpages are given. For ITER_BVEC that is a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * Attempt to get up to @maxsize bytes worth of pages from @iter.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * Return the number of bytes in the created bio_vec array, or an error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) static ssize_t iter_get_bvecs_alloc(struct iov_iter *iter, size_t maxsize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) struct bio_vec **bvecs, int *num_bvecs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) struct bio_vec *bv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) size_t orig_count = iov_iter_count(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) ssize_t bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) int npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) iov_iter_truncate(iter, maxsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) npages = iov_iter_npages(iter, INT_MAX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) iov_iter_reexpand(iter, orig_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * __iter_get_bvecs() may populate only part of the array -- zero it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) bv = kvmalloc_array(npages, sizeof(*bv), GFP_KERNEL | __GFP_ZERO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) if (!bv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) bytes = __iter_get_bvecs(iter, maxsize, bv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) if (bytes < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * No pages were pinned -- just free the array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) kvfree(bv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) return bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) *bvecs = bv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) *num_bvecs = npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) return bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) static void put_bvecs(struct bio_vec *bvecs, int num_bvecs, bool should_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) for (i = 0; i < num_bvecs; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) if (bvecs[i].bv_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) if (should_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) set_page_dirty_lock(bvecs[i].bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) put_page(bvecs[i].bv_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) kvfree(bvecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) * Prepare an open request. Preallocate ceph_cap to avoid an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * inopportune ENOMEM later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) static struct ceph_mds_request *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) prepare_open_request(struct super_block *sb, int flags, int create_mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) int want_auth = USE_ANY_MDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) int op = (flags & O_CREAT) ? CEPH_MDS_OP_CREATE : CEPH_MDS_OP_OPEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) if (flags & (O_WRONLY|O_RDWR|O_CREAT|O_TRUNC))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) want_auth = USE_AUTH_MDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) req = ceph_mdsc_create_request(mdsc, op, want_auth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) if (IS_ERR(req))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) req->r_fmode = ceph_flags_to_mode(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) req->r_args.open.flags = ceph_flags_sys2wire(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) req->r_args.open.mode = cpu_to_le32(create_mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) return req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) static int ceph_init_file_info(struct inode *inode, struct file *file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) int fmode, bool isdir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) struct ceph_file_info *fi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) dout("%s %p %p 0%o (%s)\n", __func__, inode, file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) inode->i_mode, isdir ? "dir" : "regular");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) BUG_ON(inode->i_fop->release != ceph_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) if (isdir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) struct ceph_dir_file_info *dfi =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) kmem_cache_zalloc(ceph_dir_file_cachep, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (!dfi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) file->private_data = dfi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) fi = &dfi->file_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) dfi->next_offset = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) dfi->readdir_cache_idx = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) fi = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) if (!fi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) file->private_data = fi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) ceph_get_fmode(ci, fmode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) fi->fmode = fmode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) spin_lock_init(&fi->rw_contexts_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) INIT_LIST_HEAD(&fi->rw_contexts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) fi->filp_gen = READ_ONCE(ceph_inode_to_client(inode)->filp_gen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * initialize private struct file data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * if we fail, clean up by dropping fmode reference on the ceph_inode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) switch (inode->i_mode & S_IFMT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) case S_IFREG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) ceph_fscache_register_inode_cookie(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) ceph_fscache_file_set_cookie(inode, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) case S_IFDIR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) ret = ceph_init_file_info(inode, file, fmode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) S_ISDIR(inode->i_mode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) case S_IFLNK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) dout("init_file %p %p 0%o (symlink)\n", inode, file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) inode->i_mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) dout("init_file %p %p 0%o (special)\n", inode, file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) inode->i_mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * we need to drop the open ref now, since we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * have .release set to ceph_release.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) BUG_ON(inode->i_fop->release == ceph_release);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) /* call the proper open fop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) ret = inode->i_fop->open(inode, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * try renew caps after session gets killed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) int ceph_renew_caps(struct inode *inode, int fmode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) int err, flags, wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) __ceph_touch_fmode(ci, mdsc, fmode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) wanted = __ceph_caps_file_wanted(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) if (__ceph_is_any_real_caps(ci) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) (!(wanted & CEPH_CAP_ANY_WR) || ci->i_auth_cap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) int issued = __ceph_caps_issued(ci, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) dout("renew caps %p want %s issued %s updating mds_wanted\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) inode, ceph_cap_string(wanted), ceph_cap_string(issued));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) ceph_check_caps(ci, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) if ((wanted & CEPH_CAP_FILE_RD) && (wanted & CEPH_CAP_FILE_WR))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) flags = O_RDWR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) else if (wanted & CEPH_CAP_FILE_RD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) flags = O_RDONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) else if (wanted & CEPH_CAP_FILE_WR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) flags = O_WRONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) #ifdef O_LAZY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) if (wanted & CEPH_CAP_FILE_LAZYIO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) flags |= O_LAZY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) req = prepare_open_request(inode->i_sb, flags, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) err = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) req->r_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) req->r_num_caps = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) err = ceph_mdsc_do_request(mdsc, NULL, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) dout("renew caps %p open result=%d\n", inode, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) return err < 0 ? err : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) * If we already have the requisite capabilities, we can satisfy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) * the open request locally (no need to request new caps from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) * MDS). We do, however, need to inform the MDS (asynchronously)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * if our wanted caps set expands.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) int ceph_open(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) struct ceph_mds_client *mdsc = fsc->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) struct ceph_file_info *fi = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) int flags, fmode, wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) if (fi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) dout("open file %p is already opened\n", file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) /* filter out O_CREAT|O_EXCL; vfs did that already. yuck. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) flags = file->f_flags & ~(O_CREAT|O_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) if (S_ISDIR(inode->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) flags = O_DIRECTORY; /* mds likes to know */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) dout("open inode %p ino %llx.%llx file %p flags %d (%d)\n", inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) ceph_vinop(inode), file, flags, file->f_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) fmode = ceph_flags_to_mode(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) wanted = ceph_caps_for_mode(fmode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /* snapped files are read-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) /* trivially open snapdir */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) if (ceph_snap(inode) == CEPH_SNAPDIR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) return ceph_init_file(inode, file, fmode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) * No need to block if we have caps on the auth MDS (for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) * write) or any MDS (for read). Update wanted set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) * asynchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) if (__ceph_is_any_real_caps(ci) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) int mds_wanted = __ceph_caps_mds_wanted(ci, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) int issued = __ceph_caps_issued(ci, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) dout("open %p fmode %d want %s issued %s using existing\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) inode, fmode, ceph_cap_string(wanted),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) ceph_cap_string(issued));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) __ceph_touch_fmode(ci, mdsc, fmode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) /* adjust wanted? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) if ((issued & wanted) != wanted &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) (mds_wanted & wanted) != wanted &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) ceph_snap(inode) != CEPH_SNAPDIR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) ceph_check_caps(ci, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) return ceph_init_file(inode, file, fmode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) } else if (ceph_snap(inode) != CEPH_NOSNAP &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) (ci->i_snap_caps & wanted) == wanted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) __ceph_touch_fmode(ci, mdsc, fmode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) return ceph_init_file(inode, file, fmode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) dout("open fmode %d wants %s\n", fmode, ceph_cap_string(wanted));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) req = prepare_open_request(inode->i_sb, flags, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) err = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) req->r_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) req->r_num_caps = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) err = ceph_mdsc_do_request(mdsc, NULL, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) err = ceph_init_file(inode, file, req->r_fmode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) dout("open result=%d on %llx.%llx\n", err, ceph_vinop(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) /* Clone the layout from a synchronous create, if the dir now has Dc caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) cache_file_layout(struct inode *dst, struct inode *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) struct ceph_inode_info *cdst = ceph_inode(dst);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) struct ceph_inode_info *csrc = ceph_inode(src);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) spin_lock(&cdst->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) if ((__ceph_caps_issued(cdst, NULL) & CEPH_CAP_DIR_CREATE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) !ceph_file_layout_is_valid(&cdst->i_cached_layout)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) memcpy(&cdst->i_cached_layout, &csrc->i_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) sizeof(cdst->i_cached_layout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) rcu_assign_pointer(cdst->i_cached_layout.pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) ceph_try_get_string(csrc->i_layout.pool_ns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) spin_unlock(&cdst->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) * Try to set up an async create. We need caps, a file layout, and inode number,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) * and either a lease on the dentry or complete dir info. If any of those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * criteria are not satisfied, then return false and the caller can go
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * synchronous.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) static int try_prep_async_create(struct inode *dir, struct dentry *dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) struct ceph_file_layout *lo, u64 *pino)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) struct ceph_inode_info *ci = ceph_inode(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) struct ceph_dentry_info *di = ceph_dentry(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) int got = 0, want = CEPH_CAP_FILE_EXCL | CEPH_CAP_DIR_CREATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) u64 ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) /* No auth cap means no chance for Dc caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) if (!ci->i_auth_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) goto no_async;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) /* Any delegated inos? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) if (xa_empty(&ci->i_auth_cap->session->s_delegated_inos))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) goto no_async;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) if (!ceph_file_layout_is_valid(&ci->i_cached_layout))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) goto no_async;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) if ((__ceph_caps_issued(ci, NULL) & want) != want)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) goto no_async;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) if (d_in_lookup(dentry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) if (!__ceph_dir_is_complete(ci))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) goto no_async;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) spin_lock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) di->lease_shared_gen = atomic_read(&ci->i_shared_gen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) spin_unlock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) } else if (atomic_read(&ci->i_shared_gen) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) READ_ONCE(di->lease_shared_gen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) goto no_async;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) ino = ceph_get_deleg_ino(ci->i_auth_cap->session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) if (!ino)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) goto no_async;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) *pino = ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) ceph_take_cap_refs(ci, want, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) memcpy(lo, &ci->i_cached_layout, sizeof(*lo));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) rcu_assign_pointer(lo->pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) ceph_try_get_string(ci->i_cached_layout.pool_ns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) got = want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) no_async:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) return got;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) static void restore_deleg_ino(struct inode *dir, u64 ino)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) struct ceph_inode_info *ci = ceph_inode(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) struct ceph_mds_session *s = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) if (ci->i_auth_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) s = ceph_get_mds_session(ci->i_auth_cap->session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) if (s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) int err = ceph_restore_deleg_ino(s, ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) pr_warn("ceph: unable to restore delegated ino 0x%llx to session: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) ino, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) static void ceph_async_create_cb(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) int result = req->r_err ? req->r_err :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) le32_to_cpu(req->r_reply_info.head->result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) if (result == -EJUKEBOX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) mapping_set_error(req->r_parent->i_mapping, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) if (result) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) struct dentry *dentry = req->r_dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) int pathlen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) u64 base = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) &base, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) ceph_dir_clear_complete(req->r_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) if (!d_unhashed(dentry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) d_drop(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) /* FIXME: start returning I/O errors on all accesses? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) pr_warn("ceph: async create failure path=(%llx)%s result=%d!\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) base, IS_ERR(path) ? "<<bad>>" : path, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) ceph_mdsc_free_path(path, pathlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) if (req->r_target_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) u64 ino = ceph_vino(req->r_target_inode).ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) if (req->r_deleg_ino != ino)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) pr_warn("%s: inode number mismatch! err=%d deleg_ino=0x%llx target=0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) __func__, req->r_err, req->r_deleg_ino, ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) mapping_set_error(req->r_target_inode->i_mapping, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) if (ci->i_ceph_flags & CEPH_I_ASYNC_CREATE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) ci->i_ceph_flags &= ~CEPH_I_ASYNC_CREATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) wake_up_bit(&ci->i_ceph_flags, CEPH_ASYNC_CREATE_BIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) ceph_kick_flushing_inode_caps(req->r_session, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) pr_warn("%s: no req->r_target_inode for 0x%llx\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) req->r_deleg_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) ceph_mdsc_release_dir_caps(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) static int ceph_finish_async_create(struct inode *dir, struct dentry *dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) struct file *file, umode_t mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) struct ceph_acl_sec_ctx *as_ctx,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) struct ceph_file_layout *lo)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) char xattr_buf[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) struct ceph_mds_reply_inode in = { };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) struct ceph_mds_reply_info_in iinfo = { .in = &in };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) struct ceph_inode_info *ci = ceph_inode(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) struct timespec64 now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) struct ceph_string *pool_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) struct ceph_vino vino = { .ino = req->r_deleg_ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) .snap = CEPH_NOSNAP };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) ktime_get_real_ts64(&now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) inode = ceph_get_inode(dentry->d_sb, vino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) if (IS_ERR(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) return PTR_ERR(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) iinfo.inline_version = CEPH_INLINE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) iinfo.change_attr = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) ceph_encode_timespec64(&iinfo.btime, &now);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) iinfo.xattr_len = ARRAY_SIZE(xattr_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) iinfo.xattr_data = xattr_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) memset(iinfo.xattr_data, 0, iinfo.xattr_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) in.ino = cpu_to_le64(vino.ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) in.snapid = cpu_to_le64(CEPH_NOSNAP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) in.version = cpu_to_le64(1); // ???
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) in.cap.caps = in.cap.wanted = cpu_to_le32(CEPH_CAP_ALL_FILE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) in.cap.cap_id = cpu_to_le64(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) in.cap.realm = cpu_to_le64(ci->i_snap_realm->ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) in.cap.flags = CEPH_CAP_FLAG_AUTH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) in.ctime = in.mtime = in.atime = iinfo.btime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) in.truncate_seq = cpu_to_le32(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) in.truncate_size = cpu_to_le64(-1ULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) in.xattr_version = cpu_to_le64(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) in.uid = cpu_to_le32(from_kuid(&init_user_ns, current_fsuid()));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) if (dir->i_mode & S_ISGID) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) in.gid = cpu_to_le32(from_kgid(&init_user_ns, dir->i_gid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) /* Directories always inherit the setgid bit. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) if (S_ISDIR(mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) mode |= S_ISGID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) !in_group_p(dir->i_gid) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) !capable_wrt_inode_uidgid(dir, CAP_FSETID))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) mode &= ~S_ISGID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) in.gid = cpu_to_le32(from_kgid(&init_user_ns, current_fsgid()));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) in.mode = cpu_to_le32((u32)mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) in.nlink = cpu_to_le32(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) in.max_size = cpu_to_le64(lo->stripe_unit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) ceph_file_layout_to_legacy(lo, &in.layout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) /* lo is private, so pool_ns can't change */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) pool_ns = rcu_dereference_raw(lo->pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) if (pool_ns) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) iinfo.pool_ns_len = pool_ns->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) iinfo.pool_ns_data = pool_ns->str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) down_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) ret = ceph_fill_inode(inode, NULL, &iinfo, NULL, req->r_session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) req->r_fmode, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) up_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) dout("%s failed to fill inode: %d\n", __func__, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) ceph_dir_clear_complete(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) if (!d_unhashed(dentry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) d_drop(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) if (inode->i_state & I_NEW)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) discard_new_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) struct dentry *dn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) dout("%s d_adding new inode 0x%llx to 0x%llx/%s\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) vino.ino, ceph_ino(dir), dentry->d_name.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) ceph_dir_clear_ordered(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) ceph_init_inode_acls(inode, as_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) if (inode->i_state & I_NEW) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) * If it's not I_NEW, then someone created this before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) * we got here. Assume the server is aware of it at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) * that point and don't worry about setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) * CEPH_I_ASYNC_CREATE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) ceph_inode(inode)->i_ceph_flags = CEPH_I_ASYNC_CREATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) unlock_new_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) if (d_in_lookup(dentry) || d_really_is_negative(dentry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) if (!d_unhashed(dentry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) d_drop(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) dn = d_splice_alias(inode, dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) WARN_ON_ONCE(dn && dn != dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) file->f_mode |= FMODE_CREATED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) ret = finish_open(file, dentry, ceph_open);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) * Do a lookup + open with a single request. If we get a non-existent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) * file or symlink, return 1 so the VFS can retry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) struct file *file, unsigned flags, umode_t mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) struct ceph_mds_client *mdsc = fsc->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) struct dentry *dn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) struct ceph_acl_sec_ctx as_ctx = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) int mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) dir, dentry, dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) d_unhashed(dentry) ? "unhashed" : "hashed", flags, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) if (dentry->d_name.len > NAME_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) return -ENAMETOOLONG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (flags & O_CREAT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) if (ceph_quota_is_max_files_exceeded(dir))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) return -EDQUOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) err = ceph_pre_init_acls(dir, &mode, &as_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) err = ceph_security_init_secctx(dentry, mode, &as_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) goto out_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) } else if (!d_in_lookup(dentry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) /* If it's not being looked up, it's negative */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) /* do the open */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) req = prepare_open_request(dir->i_sb, flags, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) err = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) goto out_ctx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) req->r_dentry = dget(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) req->r_num_caps = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) if (ceph_security_xattr_wanted(dir))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) mask |= CEPH_CAP_XATTR_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) req->r_args.open.mask = cpu_to_le32(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) req->r_parent = dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) if (flags & O_CREAT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) struct ceph_file_layout lo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) req->r_dentry_drop = CEPH_CAP_FILE_SHARED | CEPH_CAP_AUTH_EXCL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) if (as_ctx.pagelist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) req->r_pagelist = as_ctx.pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) as_ctx.pagelist = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) if (try_async &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) (req->r_dir_caps =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) try_prep_async_create(dir, dentry, &lo,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) &req->r_deleg_ino))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) set_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) req->r_args.open.flags |= cpu_to_le32(CEPH_O_EXCL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) req->r_callback = ceph_async_create_cb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) err = ceph_mdsc_submit_request(mdsc, dir, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) if (!err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) err = ceph_finish_async_create(dir, dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) file, mode, req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) &as_ctx, &lo);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) } else if (err == -EJUKEBOX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) restore_deleg_ino(dir, req->r_deleg_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) try_async = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) ceph_put_string(rcu_dereference_raw(lo.pool_ns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) ceph_put_string(rcu_dereference_raw(lo.pool_ns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) goto out_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) err = ceph_mdsc_do_request(mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) (flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) err = ceph_handle_snapdir(req, dentry, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) goto out_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) err = ceph_handle_notrace_create(dir, dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) if (d_in_lookup(dentry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) dn = ceph_finish_lookup(req, dentry, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) if (IS_ERR(dn))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) err = PTR_ERR(dn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) /* we were given a hashed negative dentry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) dn = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) goto out_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) if (dn || d_really_is_negative(dentry) || d_is_symlink(dentry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) /* make vfs retry on splice, ENOENT, or symlink */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) dout("atomic_open finish_no_open on dn %p\n", dn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) err = finish_no_open(file, dn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) dout("atomic_open finish_open on dn %p\n", dn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) if (req->r_op == CEPH_MDS_OP_CREATE && req->r_reply_info.has_create_ino) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) struct inode *newino = d_inode(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) cache_file_layout(dir, newino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) ceph_init_inode_acls(newino, &as_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) file->f_mode |= FMODE_CREATED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) err = finish_open(file, dentry, ceph_open);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) out_req:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) out_ctx:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) ceph_release_acl_sec_ctx(&as_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) dout("atomic_open result=%d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) int ceph_release(struct inode *inode, struct file *file)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) if (S_ISDIR(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) struct ceph_dir_file_info *dfi = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) dout("release inode %p dir file %p\n", inode, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) WARN_ON(!list_empty(&dfi->file_info.rw_contexts));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) ceph_put_fmode(ci, dfi->file_info.fmode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) if (dfi->last_readdir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) ceph_mdsc_put_request(dfi->last_readdir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) kfree(dfi->last_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) kfree(dfi->dir_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) kmem_cache_free(ceph_dir_file_cachep, dfi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) struct ceph_file_info *fi = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) dout("release inode %p regular file %p\n", inode, file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) WARN_ON(!list_empty(&fi->rw_contexts));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) ceph_put_fmode(ci, fi->fmode, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) kmem_cache_free(ceph_file_cachep, fi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) /* wake up anyone waiting for caps on this inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) HAVE_RETRIED = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) CHECK_EOF = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) READ_INLINE = 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) * Completely synchronous read and write methods. Direct from __user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) * buffer to osd, or directly to user pages (if O_DIRECT).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) * If the read spans object boundary, just do multiple reads. (That's not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) * atomic, but good enough for now.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) * If we get a short result from the OSD, check against i_size; we need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) * only return a short read to the caller if we hit EOF.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) int *retry_op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) struct file *file = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) struct ceph_osd_client *osdc = &fsc->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) u64 off = iocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) u64 len = iov_iter_count(to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) if (!len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) * flush any page cache pages in this range. this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) * will make concurrent normal and sync io slow,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) * but it will at least behave sensibly when they are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) * in sequence.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) ret = filemap_write_and_wait_range(inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) off, off + len - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) while ((len = iov_iter_count(to)) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) struct page **pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) int num_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) size_t page_off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) u64 i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) bool more;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) int idx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) size_t left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) req = ceph_osdc_new_request(osdc, &ci->i_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) ci->i_vino, off, &len, 0, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) NULL, ci->i_truncate_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) ci->i_truncate_size, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) ret = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) more = len < iov_iter_count(to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) num_pages = calc_pages_for(off, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) page_off = off & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) if (IS_ERR(pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) ret = PTR_ERR(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) false, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) ret = ceph_osdc_start_request(osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) ret = ceph_osdc_wait_request(osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) ceph_update_read_latency(&fsc->mdsc->metric,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) req->r_end_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) dout("sync_read %llu~%llu got %zd i_size %llu%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) off, len, ret, i_size, (more ? " MORE" : ""));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) if (ret == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) if (ret >= 0 && ret < len && (off + ret < i_size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) int zlen = min(len - ret, i_size - off - ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) int zoff = page_off + ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) dout("sync_read zero gap %llu~%llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) off + ret, off + ret + zlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) ceph_zero_page_vector_range(zoff, zlen, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) ret += zlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) idx = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) left = ret > 0 ? ret : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) while (left > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) size_t len, copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) page_off = off & ~PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) len = min_t(size_t, left, PAGE_SIZE - page_off);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) SetPageUptodate(pages[idx]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) copied = copy_page_to_iter(pages[idx++],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) page_off, len, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) off += copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) left -= copied;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) if (copied < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) ceph_release_page_vector(pages, num_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) if (ret == -EBLOCKLISTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) if (off >= i_size || !more)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) if (off > iocb->ki_pos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) if (ret >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) iov_iter_count(to) > 0 && off >= i_size_read(inode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) *retry_op = CHECK_EOF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) ret = off - iocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) iocb->ki_pos = off;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) dout("sync_read result %zd retry_op %d\n", ret, *retry_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) struct ceph_aio_request {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) struct kiocb *iocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) size_t total_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) bool write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) bool should_dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) int error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) struct list_head osd_reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) unsigned num_reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) atomic_t pending_reqs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) struct timespec64 mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) struct ceph_cap_flush *prealloc_cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) struct ceph_aio_work {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) struct work_struct work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) static void ceph_aio_retry_work(struct work_struct *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) static void ceph_aio_complete(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) struct ceph_aio_request *aio_req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) if (!atomic_dec_and_test(&aio_req->pending_reqs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) if (aio_req->iocb->ki_flags & IOCB_DIRECT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) inode_dio_end(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) ret = aio_req->error;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) ret = aio_req->total_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) dout("ceph_aio_complete %p rc %d\n", inode, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) if (ret >= 0 && aio_req->write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) int dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) loff_t endoff = aio_req->iocb->ki_pos + aio_req->total_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) if (endoff > i_size_read(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) if (ceph_inode_set_size(inode, endoff))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) ci->i_inline_version = CEPH_INLINE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) &aio_req->prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) if (dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) __mark_inode_dirty(inode, dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) ceph_put_cap_refs(ci, (aio_req->write ? CEPH_CAP_FILE_WR :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) CEPH_CAP_FILE_RD));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) aio_req->iocb->ki_complete(aio_req->iocb, ret, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) ceph_free_cap_flush(aio_req->prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) kfree(aio_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) static void ceph_aio_complete_req(struct ceph_osd_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) int rc = req->r_result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) struct inode *inode = req->r_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) struct ceph_aio_request *aio_req = req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) struct ceph_client_metric *metric = &ceph_sb_to_mdsc(inode->i_sb)->metric;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_BVECS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) BUG_ON(!osd_data->num_bvecs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) dout("ceph_aio_complete_req %p rc %d bytes %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) inode, rc, osd_data->bvec_pos.iter.bi_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) /* r_start_latency == 0 means the request was not submitted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) if (req->r_start_latency) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) if (aio_req->write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) ceph_update_write_latency(metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) req->r_end_latency, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) ceph_update_read_latency(metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) req->r_end_latency, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) if (rc == -EOLDSNAPC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) struct ceph_aio_work *aio_work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) BUG_ON(!aio_req->write);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) aio_work = kmalloc(sizeof(*aio_work), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) if (aio_work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) INIT_WORK(&aio_work->work, ceph_aio_retry_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) aio_work->req = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) queue_work(ceph_inode_to_client(inode)->inode_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) &aio_work->work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) rc = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) } else if (!aio_req->write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) if (rc == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) rc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) if (rc >= 0 && osd_data->bvec_pos.iter.bi_size > rc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) struct iov_iter i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) int zlen = osd_data->bvec_pos.iter.bi_size - rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) * If read is satisfied by single OSD request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) * it can pass EOF. Otherwise read is within
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) * i_size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) if (aio_req->num_reqs == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) loff_t i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) loff_t endoff = aio_req->iocb->ki_pos + rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) if (endoff < i_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) zlen = min_t(size_t, zlen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) i_size - endoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) aio_req->total_len = rc + zlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) iov_iter_bvec(&i, READ, osd_data->bvec_pos.bvecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) osd_data->num_bvecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) osd_data->bvec_pos.iter.bi_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) iov_iter_advance(&i, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) iov_iter_zero(zlen, &i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) put_bvecs(osd_data->bvec_pos.bvecs, osd_data->num_bvecs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) aio_req->should_dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) if (rc < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) cmpxchg(&aio_req->error, 0, rc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) ceph_aio_complete(inode, aio_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) static void ceph_aio_retry_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) struct ceph_aio_work *aio_work =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) container_of(work, struct ceph_aio_work, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) struct ceph_osd_request *orig_req = aio_work->req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) struct ceph_aio_request *aio_req = orig_req->r_priv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) struct inode *inode = orig_req->r_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) struct ceph_snap_context *snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) if (__ceph_have_pending_cap_snap(ci)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) struct ceph_cap_snap *capsnap =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) list_last_entry(&ci->i_cap_snaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) struct ceph_cap_snap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) ci_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) snapc = ceph_get_snap_context(capsnap->context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) BUG_ON(!ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) snapc = ceph_get_snap_context(ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) req = ceph_osdc_alloc_request(orig_req->r_osdc, snapc, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) false, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) if (!req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) req = orig_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) req->r_flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) req->r_ops[0] = orig_req->r_ops[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) req->r_mtime = aio_req->mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) req->r_data_offset = req->r_ops[0].extent.offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) ret = ceph_osdc_alloc_messages(req, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) req = orig_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) ceph_osdc_put_request(orig_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) req->r_callback = ceph_aio_complete_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) req->r_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) req->r_priv = aio_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) ret = ceph_osdc_start_request(req->r_osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) req->r_result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) ceph_aio_complete_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) ceph_put_snap_context(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) kfree(aio_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) static ssize_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) struct ceph_snap_context *snapc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) struct ceph_cap_flush **pcf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) struct file *file = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) struct ceph_client_metric *metric = &fsc->mdsc->metric;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) struct ceph_vino vino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) struct bio_vec *bvecs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) struct ceph_aio_request *aio_req = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) int num_pages = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) struct timespec64 mtime = current_time(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) size_t count = iov_iter_count(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) loff_t pos = iocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) bool write = iov_iter_rw(iter) == WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) bool should_dirty = !write && iter_is_iovec(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) if (write && ceph_snap(file_inode(file)) != CEPH_NOSNAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) dout("sync_direct_%s on file %p %lld~%u snapc %p seq %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) (write ? "write" : "read"), file, pos, (unsigned)count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) snapc, snapc ? snapc->seq : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) if (write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) int ret2 = invalidate_inode_pages2_range(inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) pos >> PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) (pos + count - 1) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) if (ret2 < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) dout("invalidate_inode_pages2_range returned %d\n", ret2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) flags = CEPH_OSD_FLAG_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) while (iov_iter_count(iter) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) u64 size = iov_iter_count(iter);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) ssize_t len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) if (write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) size = min_t(u64, size, fsc->mount_options->wsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) size = min_t(u64, size, fsc->mount_options->rsize);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) vino = ceph_vino(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) vino, pos, &size, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) write ? CEPH_OSD_OP_WRITE :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) CEPH_OSD_OP_READ,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) flags, snapc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) ci->i_truncate_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) ci->i_truncate_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) ret = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) len = iter_get_bvecs_alloc(iter, size, &bvecs, &num_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) if (len < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) ret = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) if (len != size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) osd_req_op_extent_update(req, 0, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) * To simplify error handling, allow AIO when IO within i_size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) * or IO can be satisfied by single OSD request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) if (pos == iocb->ki_pos && !is_sync_kiocb(iocb) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) (len == count || pos + count <= i_size_read(inode))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) aio_req = kzalloc(sizeof(*aio_req), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) if (aio_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) aio_req->iocb = iocb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) aio_req->write = write;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) aio_req->should_dirty = should_dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) INIT_LIST_HEAD(&aio_req->osd_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) if (write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) aio_req->mtime = mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) swap(aio_req->prealloc_cf, *pcf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) /* ignore error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) if (write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) * throw out any page cache pages in this range. this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) * may block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) truncate_inode_pages_range(inode->i_mapping, pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) PAGE_ALIGN(pos + len) - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) req->r_mtime = mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) if (aio_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) aio_req->total_len += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) aio_req->num_reqs++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) atomic_inc(&aio_req->pending_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) req->r_callback = ceph_aio_complete_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) req->r_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) req->r_priv = aio_req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) list_add_tail(&req->r_private_item, &aio_req->osd_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) pos += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) ret = ceph_osdc_start_request(req->r_osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) if (write)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) ceph_update_write_latency(metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) req->r_end_latency, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) ceph_update_read_latency(metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) req->r_end_latency, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) if (!write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) if (ret == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) if (ret >= 0 && ret < len && pos + ret < size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) struct iov_iter i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) int zlen = min_t(size_t, len - ret,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) size - pos - ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) iov_iter_bvec(&i, READ, bvecs, num_pages, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) iov_iter_advance(&i, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) iov_iter_zero(zlen, &i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) ret += zlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) if (ret >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) len = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) put_bvecs(bvecs, num_pages, should_dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) pos += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) if (!write && pos >= size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) if (write && pos > size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) if (ceph_inode_set_size(inode, pos))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) ceph_check_caps(ceph_inode(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) CHECK_CAPS_AUTHONLY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) if (aio_req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) LIST_HEAD(osd_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) if (aio_req->num_reqs == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) kfree(aio_req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) ceph_get_cap_refs(ci, write ? CEPH_CAP_FILE_WR :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) CEPH_CAP_FILE_RD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) list_splice(&aio_req->osd_reqs, &osd_reqs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) inode_dio_begin(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) while (!list_empty(&osd_reqs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) req = list_first_entry(&osd_reqs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) struct ceph_osd_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) r_private_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) list_del_init(&req->r_private_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) if (ret >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) ret = ceph_osdc_start_request(req->r_osdc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) req->r_result = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) ceph_aio_complete_req(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) return -EIOCBQUEUED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) if (ret != -EOLDSNAPC && pos > iocb->ki_pos) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) ret = pos - iocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) iocb->ki_pos = pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) * Synchronous write, straight from __user pointer or user pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) * If write spans object boundary, just do multiple writes. (For a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) * correct atomic write, we should e.g. take write locks on all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) * objects, rollback on failure, etc.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) static ssize_t
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) struct ceph_snap_context *snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) struct file *file = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) struct ceph_vino vino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) struct page **pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) u64 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) int num_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) int written = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) int flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) bool check_caps = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) struct timespec64 mtime = current_time(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) size_t count = iov_iter_count(from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) dout("sync_write on file %p %lld~%u snapc %p seq %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) file, pos, (unsigned)count, snapc, snapc->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) ret = filemap_write_and_wait_range(inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) pos, pos + count - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) ret = invalidate_inode_pages2_range(inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) pos >> PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) (pos + count - 1) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) dout("invalidate_inode_pages2_range returned %d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) flags = /* CEPH_OSD_FLAG_ORDERSNAP | */ CEPH_OSD_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) while ((len = iov_iter_count(from)) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) size_t left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) int n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) vino = ceph_vino(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) vino, pos, &len, 0, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) CEPH_OSD_OP_WRITE, flags, snapc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) ci->i_truncate_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) ci->i_truncate_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) ret = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) * write from beginning of first page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) * regardless of io alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) num_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) if (IS_ERR(pages)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) ret = PTR_ERR(pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) left = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) for (n = 0; n < num_pages; n++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) size_t plen = min_t(size_t, left, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) ret = copy_page_from_iter(pages[n], 0, plen, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) if (ret != plen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) left -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) ceph_release_page_vector(pages, num_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) req->r_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) false, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) req->r_mtime = mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) req->r_end_latency, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) if (ret != 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) ceph_set_error_write(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) ceph_clear_error_write(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) pos += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) written += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) if (pos > i_size_read(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) check_caps = ceph_inode_set_size(inode, pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) if (check_caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) ceph_check_caps(ceph_inode(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) CHECK_CAPS_AUTHONLY,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) if (ret != -EOLDSNAPC && written > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) ret = written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) iocb->ki_pos = pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) * Wrap generic_file_aio_read with checks for cap bits on the inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) * Atomically grab references, so that those bits are not released
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) * back to the MDS mid-read.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) * Hmm, the sync read case isn't actually async... should it be?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) struct file *filp = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) struct ceph_file_info *fi = filp->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) size_t len = iov_iter_count(to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) struct inode *inode = file_inode(filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) struct page *pinned_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) int want, got = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) int retry_op = 0, read = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) dout("aio_read %p %llx.%llx %llu~%u trying to get caps on %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) if (direct_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) ceph_start_io_direct(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) ceph_start_io_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) if (fi->fmode & CEPH_FILE_MODE_LAZY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) want = CEPH_CAP_FILE_CACHE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) &got, &pinned_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) if (iocb->ki_flags & IOCB_DIRECT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) ceph_end_io_direct(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) ceph_end_io_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) (iocb->ki_flags & IOCB_DIRECT) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) (fi->flags & CEPH_F_SYNC)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) ceph_cap_string(got));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) if (ci->i_inline_version == CEPH_INLINE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) if (!retry_op && (iocb->ki_flags & IOCB_DIRECT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) ret = ceph_direct_read_write(iocb, to,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) if (ret >= 0 && ret < len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) retry_op = CHECK_EOF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) ret = ceph_sync_read(iocb, to, &retry_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) retry_op = READ_INLINE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) CEPH_DEFINE_RW_CONTEXT(rw_ctx, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) ceph_cap_string(got));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) ceph_add_rw_context(fi, &rw_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) ret = generic_file_read_iter(iocb, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) ceph_del_rw_context(fi, &rw_ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) if (pinned_page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) put_page(pinned_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) pinned_page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) if (direct_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) ceph_end_io_direct(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) ceph_end_io_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) if (retry_op > HAVE_RETRIED && ret >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) int statret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) struct page *page = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) loff_t i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) if (retry_op == READ_INLINE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) page = __page_cache_alloc(GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) statret = __ceph_do_getattr(inode, page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) CEPH_STAT_CAP_INLINE_DATA, !!page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) if (statret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) __free_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) if (statret == -ENODATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) BUG_ON(retry_op != READ_INLINE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) return statret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) if (retry_op == READ_INLINE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) BUG_ON(ret > 0 || read > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) if (iocb->ki_pos < i_size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) iocb->ki_pos < PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) loff_t end = min_t(loff_t, i_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) iocb->ki_pos + len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) end = min_t(loff_t, end, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) if (statret < end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) zero_user_segment(page, statret, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) ret = copy_page_to_iter(page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) iocb->ki_pos & ~PAGE_MASK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) end - iocb->ki_pos, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) iocb->ki_pos += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) read += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) if (iocb->ki_pos < i_size && read < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) size_t zlen = min_t(size_t, len - read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) i_size - iocb->ki_pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) ret = iov_iter_zero(zlen, to);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) iocb->ki_pos += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) read += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) __free_pages(page, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) return read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) /* hit EOF or hole? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) if (retry_op == CHECK_EOF && iocb->ki_pos < i_size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) ret < len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) dout("sync_read hit hole, ppos %lld < size %lld"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) ", reading more\n", iocb->ki_pos, i_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) read += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) len -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) retry_op = HAVE_RETRIED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) if (ret >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) ret += read;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) * Take cap references to avoid releasing caps to MDS mid-write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) * If we are synchronous, and write with an old snap context, the OSD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) * may return EOLDSNAPC. In that case, retry the write.. _after_
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) * dropping our cap refs and allowing the pending snap to logically
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) * complete _before_ this write occurs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) * If we are near ENOSPC, write synchronously.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) struct file *file = iocb->ki_filp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) struct ceph_file_info *fi = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) struct ceph_osd_client *osdc = &fsc->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) struct ceph_cap_flush *prealloc_cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) ssize_t count, written = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) int err, want, got;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) bool direct_lock = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) u32 map_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) u64 pool_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) loff_t pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) loff_t limit = max(i_size_read(inode), fsc->max_file_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) if (ceph_snap(inode) != CEPH_NOSNAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) prealloc_cf = ceph_alloc_cap_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) if (!prealloc_cf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) if ((iocb->ki_flags & (IOCB_DIRECT | IOCB_APPEND)) == IOCB_DIRECT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) direct_lock = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) retry_snap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) if (direct_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) ceph_start_io_direct(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) ceph_start_io_write(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) /* We can write back this queue in page reclaim */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) current->backing_dev_info = inode_to_bdi(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) if (iocb->ki_flags & IOCB_APPEND) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) err = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) err = generic_write_checks(iocb, from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) if (err <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) pos = iocb->ki_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) if (unlikely(pos >= limit)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) err = -EFBIG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) iov_iter_truncate(from, limit - pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) count = iov_iter_count(from);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) if (ceph_quota_is_max_bytes_exceeded(inode, pos + count)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) err = -EDQUOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) down_read(&osdc->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) map_flags = osdc->osdmap->flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) pool_flags = ceph_pg_pool_flags(osdc->osdmap, ci->i_layout.pool_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) up_read(&osdc->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) if ((map_flags & CEPH_OSDMAP_FULL) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) (pool_flags & CEPH_POOL_FLAG_FULL)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) err = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) err = file_remove_privs(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) if (ci->i_inline_version != CEPH_INLINE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) err = ceph_uninline_data(file, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) dout("aio_write %p %llx.%llx %llu~%zd getting caps. i_size %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) inode, ceph_vinop(inode), pos, count, i_size_read(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) if (fi->fmode & CEPH_FILE_MODE_LAZY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) want = CEPH_CAP_FILE_BUFFER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) got = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) &got, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) err = file_update_time(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) goto out_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) inode_inc_iversion_raw(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) dout("aio_write %p %llx.%llx %llu~%zd got cap refs on %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) inode, ceph_vinop(inode), pos, count, ceph_cap_string(got));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) (iocb->ki_flags & IOCB_DIRECT) || (fi->flags & CEPH_F_SYNC) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) (ci->i_ceph_flags & CEPH_I_ERROR_WRITE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) struct ceph_snap_context *snapc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) struct iov_iter data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) if (__ceph_have_pending_cap_snap(ci)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) struct ceph_cap_snap *capsnap =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) list_last_entry(&ci->i_cap_snaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) struct ceph_cap_snap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) ci_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) snapc = ceph_get_snap_context(capsnap->context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) BUG_ON(!ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) snapc = ceph_get_snap_context(ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) /* we might need to revert back to that point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) data = *from;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) if (iocb->ki_flags & IOCB_DIRECT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) written = ceph_direct_read_write(iocb, &data, snapc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) &prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) written = ceph_sync_write(iocb, &data, pos, snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) if (direct_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) ceph_end_io_direct(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) ceph_end_io_write(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) if (written > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) iov_iter_advance(from, written);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) ceph_put_snap_context(snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) * No need to acquire the i_truncate_mutex. Because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) * the MDS revokes Fwb caps before sending truncate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) * message to us. We can't get Fwb cap while there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) * are pending vmtruncate. So write and vmtruncate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) * can not run at the same time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) written = generic_perform_write(file, from, pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) if (likely(written >= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) iocb->ki_pos = pos + written;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) ceph_end_io_write(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) if (written >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) int dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) ci->i_inline_version = CEPH_INLINE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) &prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) if (dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) __mark_inode_dirty(inode, dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) if (ceph_quota_is_max_bytes_approaching(inode, iocb->ki_pos))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) ceph_check_caps(ci, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) inode, ceph_vinop(inode), pos, (unsigned)count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) ceph_cap_string(got));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) if (written == -EOLDSNAPC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) inode, ceph_vinop(inode), pos, (unsigned)count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) goto retry_snap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) if (written >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) if ((map_flags & CEPH_OSDMAP_NEARFULL) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) (pool_flags & CEPH_POOL_FLAG_NEARFULL))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) iocb->ki_flags |= IOCB_DSYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) written = generic_write_sync(iocb, written);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) goto out_unlocked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) out_caps:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) if (direct_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) ceph_end_io_direct(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) ceph_end_io_write(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) out_unlocked:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) ceph_free_cap_flush(prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) current->backing_dev_info = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) return written ? written : err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) * llseek. be sure to verify file size on SEEK_END.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) static loff_t ceph_llseek(struct file *file, loff_t offset, int whence)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) struct inode *inode = file->f_mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) loff_t i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) loff_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) if (whence == SEEK_END || whence == SEEK_DATA || whence == SEEK_HOLE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) ret = ceph_do_getattr(inode, CEPH_STAT_CAP_SIZE, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) i_size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) switch (whence) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) case SEEK_END:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) offset += i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) case SEEK_CUR:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) * Here we special-case the lseek(fd, 0, SEEK_CUR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) * position-querying operation. Avoid rewriting the "same"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) * f_pos value back to the file because a concurrent read(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) * write() or lseek() might have altered it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) if (offset == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) ret = file->f_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) offset += file->f_pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) case SEEK_DATA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) if (offset < 0 || offset >= i_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) ret = -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) case SEEK_HOLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) if (offset < 0 || offset >= i_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) ret = -ENXIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) offset = i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) ret = vfs_setpos(file, offset, max(i_size, fsc->max_file_size));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) static inline void ceph_zero_partial_page(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) struct inode *inode, loff_t offset, unsigned size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) pgoff_t index = offset >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) page = find_lock_page(inode->i_mapping, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) wait_on_page_writeback(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) zero_user(page, offset & (PAGE_SIZE - 1), size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) loff_t length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) loff_t nearly = round_up(offset, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) if (offset < nearly) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) loff_t size = nearly - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) if (length < size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) size = length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) ceph_zero_partial_page(inode, offset, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) offset += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) length -= size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) if (length >= PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) loff_t size = round_down(length, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) truncate_pagecache_range(inode, offset, offset + size - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) offset += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) length -= size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) if (length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) ceph_zero_partial_page(inode, offset, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) static int ceph_zero_partial_object(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) loff_t offset, loff_t *length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) struct ceph_osd_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) loff_t zero = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) int op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) if (!length) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) op = offset ? CEPH_OSD_OP_DELETE : CEPH_OSD_OP_TRUNCATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) length = &zero;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) op = CEPH_OSD_OP_ZERO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) ceph_vino(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) offset, length,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 0, 1, op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) CEPH_OSD_FLAG_WRITE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) NULL, 0, 0, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) if (IS_ERR(req)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) ret = PTR_ERR(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) req->r_mtime = inode->i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) ret = ceph_osdc_start_request(&fsc->client->osdc, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) if (ret == -ENOENT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) ceph_osdc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) static int ceph_zero_objects(struct inode *inode, loff_t offset, loff_t length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) s32 stripe_unit = ci->i_layout.stripe_unit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) s32 stripe_count = ci->i_layout.stripe_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) s32 object_size = ci->i_layout.object_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) u64 object_set_size = object_size * stripe_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) u64 nearly, t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) /* round offset up to next period boundary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) nearly = offset + object_set_size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) t = nearly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) nearly -= do_div(t, object_set_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) while (length && offset < nearly) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) loff_t size = length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) ret = ceph_zero_partial_object(inode, offset, &size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) offset += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) length -= size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) while (length >= object_set_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) loff_t pos = offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) for (i = 0; i < stripe_count; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) ret = ceph_zero_partial_object(inode, pos, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) pos += stripe_unit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) offset += object_set_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) length -= object_set_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) while (length) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) loff_t size = length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) ret = ceph_zero_partial_object(inode, offset, &size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) offset += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) length -= size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) static long ceph_fallocate(struct file *file, int mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) loff_t offset, loff_t length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) struct ceph_file_info *fi = file->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) struct inode *inode = file_inode(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) struct ceph_cap_flush *prealloc_cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) int want, got = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) int dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) loff_t endoff = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) loff_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) if (mode != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) if (!S_ISREG(inode->i_mode))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) prealloc_cf = ceph_alloc_cap_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) if (!prealloc_cf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) inode_lock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) if (ceph_snap(inode) != CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) ret = -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) if (ci->i_inline_version != CEPH_INLINE_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) ret = ceph_uninline_data(file, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) size = i_size_read(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) /* Are we punching a hole beyond EOF? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) if (offset >= size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) if ((offset + length) > size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) length = size - offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) if (fi->fmode & CEPH_FILE_MODE_LAZY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) want = CEPH_CAP_FILE_BUFFER | CEPH_CAP_FILE_LAZYIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) want = CEPH_CAP_FILE_BUFFER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) ret = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) ceph_zero_pagecache_range(inode, offset, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) ret = ceph_zero_objects(inode, offset, length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) ci->i_inline_version = CEPH_INLINE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) &prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) if (dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) __mark_inode_dirty(inode, dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) ceph_put_cap_refs(ci, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) inode_unlock(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) ceph_free_cap_flush(prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) * This function tries to get FILE_WR capabilities for dst_ci and FILE_RD for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) * src_ci. Two attempts are made to obtain both caps, and an error is return if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) * this fails; zero is returned on success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) static int get_rd_wr_caps(struct file *src_filp, int *src_got,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) struct file *dst_filp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) loff_t dst_endoff, int *dst_got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) bool retrying = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) retry_caps:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) ret = ceph_get_caps(dst_filp, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) dst_endoff, dst_got, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) * Since we're already holding the FILE_WR capability for the dst file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) * we would risk a deadlock by using ceph_get_caps. Thus, we'll do some
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) * retry dance instead to try to get both capabilities.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) ret = ceph_try_get_caps(file_inode(src_filp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) CEPH_CAP_FILE_RD, CEPH_CAP_FILE_SHARED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) false, src_got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) if (ret <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) /* Start by dropping dst_ci caps and getting src_ci caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) ceph_put_cap_refs(ceph_inode(file_inode(dst_filp)), *dst_got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) if (retrying) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) /* ceph_try_get_caps masks EAGAIN */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) ret = ceph_get_caps(src_filp, CEPH_CAP_FILE_RD,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) CEPH_CAP_FILE_SHARED, -1, src_got, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) /*... drop src_ci caps too, and retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) ceph_put_cap_refs(ceph_inode(file_inode(src_filp)), *src_got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) retrying = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) goto retry_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) static void put_rd_wr_caps(struct ceph_inode_info *src_ci, int src_got,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) struct ceph_inode_info *dst_ci, int dst_got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) ceph_put_cap_refs(src_ci, src_got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) ceph_put_cap_refs(dst_ci, dst_got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) * This function does several size-related checks, returning an error if:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) * - source file is smaller than off+len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) * - destination file size is not OK (inode_newsize_ok())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) * - max bytes quotas is exceeded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) static int is_file_size_ok(struct inode *src_inode, struct inode *dst_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) loff_t src_off, loff_t dst_off, size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) loff_t size, endoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) size = i_size_read(src_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) * Don't copy beyond source file EOF. Instead of simply setting length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) * to (size - src_off), just drop to VFS default implementation, as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) * local i_size may be stale due to other clients writing to the source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) * inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) if (src_off + len > size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) dout("Copy beyond EOF (%llu + %zu > %llu)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) src_off, len, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) size = i_size_read(dst_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) endoff = dst_off + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) if (inode_newsize_ok(dst_inode, endoff))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) if (ceph_quota_is_max_bytes_exceeded(dst_inode, endoff))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) return -EDQUOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) static ssize_t ceph_do_objects_copy(struct ceph_inode_info *src_ci, u64 *src_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) struct ceph_inode_info *dst_ci, u64 *dst_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) struct ceph_fs_client *fsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) size_t len, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) struct ceph_object_locator src_oloc, dst_oloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) struct ceph_object_id src_oid, dst_oid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) size_t bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) u64 src_objnum, src_objoff, dst_objnum, dst_objoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) u32 src_objlen, dst_objlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) u32 object_size = src_ci->i_layout.object_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) src_oloc.pool = src_ci->i_layout.pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) src_oloc.pool_ns = ceph_try_get_string(src_ci->i_layout.pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) dst_oloc.pool = dst_ci->i_layout.pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) dst_oloc.pool_ns = ceph_try_get_string(dst_ci->i_layout.pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) while (len >= object_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) ceph_calc_file_object_mapping(&src_ci->i_layout, *src_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) object_size, &src_objnum,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) &src_objoff, &src_objlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) ceph_calc_file_object_mapping(&dst_ci->i_layout, *dst_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) object_size, &dst_objnum,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) &dst_objoff, &dst_objlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) ceph_oid_init(&src_oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) ceph_oid_printf(&src_oid, "%llx.%08llx",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) src_ci->i_vino.ino, src_objnum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) ceph_oid_init(&dst_oid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) ceph_oid_printf(&dst_oid, "%llx.%08llx",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) dst_ci->i_vino.ino, dst_objnum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) /* Do an object remote copy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) ret = ceph_osdc_copy_from(&fsc->client->osdc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) src_ci->i_vino.snap, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) &src_oid, &src_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) CEPH_OSD_OP_FLAG_FADVISE_NOCACHE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) &dst_oid, &dst_oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) CEPH_OSD_OP_FLAG_FADVISE_DONTNEED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) dst_ci->i_truncate_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) dst_ci->i_truncate_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) CEPH_OSD_COPY_FROM_FLAG_TRUNCATE_SEQ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) if (ret == -EOPNOTSUPP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) fsc->have_copy_from2 = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) pr_notice("OSDs don't support copy-from2; disabling copy offload\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) dout("ceph_osdc_copy_from returned %d\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) if (!bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) bytes = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) len -= object_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) bytes += object_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) *src_off += object_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) *dst_off += object_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) ceph_oloc_destroy(&src_oloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) ceph_oloc_destroy(&dst_oloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) return bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) struct file *dst_file, loff_t dst_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) size_t len, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) struct inode *src_inode = file_inode(src_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) struct inode *dst_inode = file_inode(dst_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) struct ceph_inode_info *src_ci = ceph_inode(src_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) struct ceph_inode_info *dst_ci = ceph_inode(dst_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) struct ceph_cap_flush *prealloc_cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) struct ceph_fs_client *src_fsc = ceph_inode_to_client(src_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) loff_t size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) ssize_t ret = -EIO, bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) u64 src_objnum, dst_objnum, src_objoff, dst_objoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) u32 src_objlen, dst_objlen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) int src_got = 0, dst_got = 0, err, dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) if (src_inode->i_sb != dst_inode->i_sb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) struct ceph_fs_client *dst_fsc = ceph_inode_to_client(dst_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) if (ceph_fsid_compare(&src_fsc->client->fsid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) &dst_fsc->client->fsid)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) dout("Copying files across clusters: src: %pU dst: %pU\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) &src_fsc->client->fsid, &dst_fsc->client->fsid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) return -EXDEV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) if (ceph_snap(dst_inode) != CEPH_NOSNAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) return -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) * Some of the checks below will return -EOPNOTSUPP, which will force a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) * fallback to the default VFS copy_file_range implementation. This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) * desirable in several cases (for ex, the 'len' is smaller than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) * size of the objects, or in cases where that would be more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) * efficient).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) if (ceph_test_mount_opt(src_fsc, NOCOPYFROM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) if (!src_fsc->have_copy_from2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) * Striped file layouts require that we copy partial objects, but the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) * OSD copy-from operation only supports full-object copies. Limit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) * this to non-striped file layouts for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) if ((src_ci->i_layout.stripe_unit != dst_ci->i_layout.stripe_unit) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) (src_ci->i_layout.stripe_count != 1) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) (dst_ci->i_layout.stripe_count != 1) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) (src_ci->i_layout.object_size != dst_ci->i_layout.object_size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) dout("Invalid src/dst files layout\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) if (len < src_ci->i_layout.object_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) return -EOPNOTSUPP; /* no remote copy will be done */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) prealloc_cf = ceph_alloc_cap_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) if (!prealloc_cf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) /* Start by sync'ing the source and destination files */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) ret = file_write_and_wait_range(src_file, src_off, (src_off + len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) dout("failed to write src file (%zd)\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) ret = file_write_and_wait_range(dst_file, dst_off, (dst_off + len));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) dout("failed to write dst file (%zd)\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) * We need FILE_WR caps for dst_ci and FILE_RD for src_ci as other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) * clients may have dirty data in their caches. And OSDs know nothing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) * about caps, so they can't safely do the remote object copies.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) err = get_rd_wr_caps(src_file, &src_got,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) dst_file, (dst_off + len), &dst_got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) dout("get_rd_wr_caps returned %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) ret = is_file_size_ok(src_inode, dst_inode, src_off, dst_off, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) goto out_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) /* Drop dst file cached pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) ret = invalidate_inode_pages2_range(dst_inode->i_mapping,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) dst_off >> PAGE_SHIFT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) (dst_off + len) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) dout("Failed to invalidate inode pages (%zd)\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) ret = 0; /* XXX */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) ceph_calc_file_object_mapping(&src_ci->i_layout, src_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) src_ci->i_layout.object_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) &src_objnum, &src_objoff, &src_objlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) ceph_calc_file_object_mapping(&dst_ci->i_layout, dst_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) dst_ci->i_layout.object_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) &dst_objnum, &dst_objoff, &dst_objlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) /* object-level offsets need to the same */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) if (src_objoff != dst_objoff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) ret = -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) goto out_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) * Do a manual copy if the object offset isn't object aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) * 'src_objlen' contains the bytes left until the end of the object,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) * starting at the src_off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) if (src_objoff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) dout("Initial partial copy of %u bytes\n", src_objlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) * we need to temporarily drop all caps as we'll be calling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) * {read,write}_iter, which will get caps again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) put_rd_wr_caps(src_ci, src_got, dst_ci, dst_got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) ret = do_splice_direct(src_file, &src_off, dst_file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) &dst_off, src_objlen, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) /* Abort on short copies or on error */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) if (ret < src_objlen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) dout("Failed partial copy (%zd)\n", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) len -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) err = get_rd_wr_caps(src_file, &src_got,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) dst_file, (dst_off + len), &dst_got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) err = is_file_size_ok(src_inode, dst_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) src_off, dst_off, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) goto out_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) size = i_size_read(dst_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) bytes = ceph_do_objects_copy(src_ci, &src_off, dst_ci, &dst_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) src_fsc, len, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) if (bytes <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) ret = bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) goto out_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) dout("Copied %zu bytes out of %zu\n", bytes, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) len -= bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) ret += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) file_update_time(dst_file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) inode_inc_iversion_raw(dst_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) if (dst_off > size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) /* Let the MDS know about dst file size change */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) if (ceph_inode_set_size(dst_inode, dst_off) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) ceph_quota_is_max_bytes_approaching(dst_inode, dst_off))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) ceph_check_caps(dst_ci, CHECK_CAPS_AUTHONLY, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) /* Mark Fw dirty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) spin_lock(&dst_ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) dst_ci->i_inline_version = CEPH_INLINE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) dirty = __ceph_mark_dirty_caps(dst_ci, CEPH_CAP_FILE_WR, &prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) spin_unlock(&dst_ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) if (dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) __mark_inode_dirty(dst_inode, dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) out_caps:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) put_rd_wr_caps(src_ci, src_got, dst_ci, dst_got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) * Do the final manual copy if we still have some bytes left, unless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) * there were errors in remote object copies (len >= object_size).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) if (len && (len < src_ci->i_layout.object_size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) dout("Final partial copy of %zu bytes\n", len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) bytes = do_splice_direct(src_file, &src_off, dst_file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) &dst_off, len, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) if (bytes > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) ret += bytes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) dout("Failed partial copy (%zd)\n", bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) ceph_free_cap_flush(prealloc_cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) struct file *dst_file, loff_t dst_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) size_t len, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) ssize_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) ret = __ceph_copy_file_range(src_file, src_off, dst_file, dst_off,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) len, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) if (ret == -EOPNOTSUPP || ret == -EXDEV)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) ret = generic_copy_file_range(src_file, src_off, dst_file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) dst_off, len, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) const struct file_operations ceph_file_fops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) .open = ceph_open,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) .release = ceph_release,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) .llseek = ceph_llseek,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) .read_iter = ceph_read_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) .write_iter = ceph_write_iter,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) .mmap = ceph_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) .fsync = ceph_fsync,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) .lock = ceph_lock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) .setlease = simple_nosetlease,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) .flock = ceph_flock,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) .splice_read = generic_file_splice_read,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) .splice_write = iter_file_splice_write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) .unlocked_ioctl = ceph_ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) .compat_ioctl = compat_ptr_ioctl,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) .fallocate = ceph_fallocate,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) .copy_file_range = ceph_copy_file_range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) };