^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/ceph/ceph_debug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/wait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/writeback.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/iversion.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include "super.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "mds_client.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "cache.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/ceph/decode.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <linux/ceph/messenger.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * Capability management
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * The Ceph metadata servers control client access to inode metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * and file data by issuing capabilities, granting clients permission
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * to read and/or write both inode field and file data to OSDs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * (storage nodes). Each capability consists of a set of bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * indicating which operations are allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * If the client holds a *_SHARED cap, the client has a coherent value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * that can be safely read from the cached inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * In the case of a *_EXCL (exclusive) or FILE_WR capabilities, the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * client is allowed to change inode attributes (e.g., file size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * mtime), note its dirty state in the ceph_cap, and asynchronously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * flush that metadata change to the MDS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * In the event of a conflicting operation (perhaps by another
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * client), the MDS will revoke the conflicting client capabilities.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * In order for a client to cache an inode, it must hold a capability
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * with at least one MDS server. When inodes are released, release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * notifications are batched and periodically sent en masse to the MDS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * cluster to release server state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) u64 oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * Generate readable cap strings for debugging output.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #define MAX_CAP_STR 20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static char cap_str[MAX_CAP_STR][40];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) static DEFINE_SPINLOCK(cap_str_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) static int last_cap_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) static char *gcap_string(char *s, int c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) if (c & CEPH_CAP_GSHARED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) *s++ = 's';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) if (c & CEPH_CAP_GEXCL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) *s++ = 'x';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) if (c & CEPH_CAP_GCACHE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) *s++ = 'c';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) if (c & CEPH_CAP_GRD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) *s++ = 'r';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) if (c & CEPH_CAP_GWR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) *s++ = 'w';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) if (c & CEPH_CAP_GBUFFER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) *s++ = 'b';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) if (c & CEPH_CAP_GWREXTEND)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) *s++ = 'a';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) if (c & CEPH_CAP_GLAZYIO)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) *s++ = 'l';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) const char *ceph_cap_string(int caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) char *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) int c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) spin_lock(&cap_str_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) i = last_cap_str++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) if (last_cap_str == MAX_CAP_STR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) last_cap_str = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) spin_unlock(&cap_str_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) s = cap_str[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) if (caps & CEPH_CAP_PIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) *s++ = 'p';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) c = (caps >> CEPH_CAP_SAUTH) & 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) if (c) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) *s++ = 'A';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) s = gcap_string(s, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) c = (caps >> CEPH_CAP_SLINK) & 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) if (c) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) *s++ = 'L';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) s = gcap_string(s, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) c = (caps >> CEPH_CAP_SXATTR) & 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (c) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) *s++ = 'X';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) s = gcap_string(s, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) c = caps >> CEPH_CAP_SFILE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) if (c) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) *s++ = 'F';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) s = gcap_string(s, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) if (s == cap_str[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) *s++ = '-';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) *s = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) return cap_str[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) void ceph_caps_init(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) INIT_LIST_HEAD(&mdsc->caps_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) spin_lock_init(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) void ceph_caps_finalize(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) while (!list_empty(&mdsc->caps_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) cap = list_first_entry(&mdsc->caps_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) struct ceph_cap, caps_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) list_del(&cap->caps_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) kmem_cache_free(ceph_cap_cachep, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) mdsc->caps_total_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) mdsc->caps_avail_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) mdsc->caps_use_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) mdsc->caps_reserve_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) mdsc->caps_min_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) void ceph_adjust_caps_max_min(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) struct ceph_mount_options *fsopt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) mdsc->caps_min_count = fsopt->max_readdir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) if (mdsc->caps_min_count < 1024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) mdsc->caps_min_count = 1024;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) mdsc->caps_use_max = fsopt->caps_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) if (mdsc->caps_use_max > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) mdsc->caps_use_max < mdsc->caps_min_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) mdsc->caps_use_max = mdsc->caps_min_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) static void __ceph_unreserve_caps(struct ceph_mds_client *mdsc, int nr_caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) if (nr_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) BUG_ON(mdsc->caps_reserve_count < nr_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) mdsc->caps_reserve_count -= nr_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) if (mdsc->caps_avail_count >=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) mdsc->caps_reserve_count + mdsc->caps_min_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) mdsc->caps_total_count -= nr_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) for (i = 0; i < nr_caps; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) cap = list_first_entry(&mdsc->caps_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) struct ceph_cap, caps_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) list_del(&cap->caps_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) kmem_cache_free(ceph_cap_cachep, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) mdsc->caps_avail_count += nr_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) dout("%s: caps %d = %d used + %d resv + %d avail\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) mdsc->caps_total_count, mdsc->caps_use_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) mdsc->caps_reserve_count, mdsc->caps_avail_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) mdsc->caps_reserve_count +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) mdsc->caps_avail_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * Called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) int ceph_reserve_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) struct ceph_cap_reservation *ctx, int need)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) int have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) int alloc = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) int max_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) bool trimmed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) struct ceph_mds_session *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) LIST_HEAD(newcaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) dout("reserve caps ctx=%p need=%d\n", ctx, need);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) /* first reserve any caps that are already allocated */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) if (mdsc->caps_avail_count >= need)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) have = need;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) have = mdsc->caps_avail_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) mdsc->caps_avail_count -= have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) mdsc->caps_reserve_count += have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) mdsc->caps_reserve_count +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) mdsc->caps_avail_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) for (i = have; i < need; ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) list_add(&cap->caps_item, &newcaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) alloc++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) if (!trimmed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) for (j = 0; j < mdsc->max_sessions; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) s = __ceph_lookup_mds_session(mdsc, j);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) mutex_lock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) max_caps = s->s_nr_caps - (need - i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) ceph_trim_caps(mdsc, s, max_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) mutex_unlock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) trimmed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) if (mdsc->caps_avail_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) int more_have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) if (mdsc->caps_avail_count >= need - i)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) more_have = need - i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) more_have = mdsc->caps_avail_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) i += more_have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) have += more_have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) mdsc->caps_avail_count -= more_have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) mdsc->caps_reserve_count += more_have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) pr_warn("reserve caps ctx=%p ENOMEM need=%d got=%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) ctx, need, have + alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) if (!err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) BUG_ON(have + alloc != need);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) ctx->count = need;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) ctx->used = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) mdsc->caps_total_count += alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) mdsc->caps_reserve_count += alloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) list_splice(&newcaps, &mdsc->caps_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) mdsc->caps_reserve_count +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) mdsc->caps_avail_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) __ceph_unreserve_caps(mdsc, have + alloc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) dout("reserve caps ctx=%p %d = %d used + %d resv + %d avail\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) ctx, mdsc->caps_total_count, mdsc->caps_use_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) mdsc->caps_reserve_count, mdsc->caps_avail_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) void ceph_unreserve_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) struct ceph_cap_reservation *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) bool reclaim = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (!ctx->count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) dout("unreserve caps ctx=%p count=%d\n", ctx, ctx->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) __ceph_unreserve_caps(mdsc, ctx->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) ctx->count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) if (mdsc->caps_use_max > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) mdsc->caps_use_count > mdsc->caps_use_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) reclaim = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) if (reclaim)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) ceph_reclaim_caps_nr(mdsc, ctx->used);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) struct ceph_cap_reservation *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) struct ceph_cap *cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) /* temporary, until we do something about cap import/export */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) if (!ctx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) cap = kmem_cache_alloc(ceph_cap_cachep, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) if (cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) mdsc->caps_use_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) mdsc->caps_total_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) if (mdsc->caps_avail_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) BUG_ON(list_empty(&mdsc->caps_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) mdsc->caps_avail_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) mdsc->caps_use_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) cap = list_first_entry(&mdsc->caps_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) struct ceph_cap, caps_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) list_del(&cap->caps_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) mdsc->caps_reserve_count + mdsc->caps_avail_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) return cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) dout("get_cap ctx=%p (%d) %d = %d used + %d resv + %d avail\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) ctx, ctx->count, mdsc->caps_total_count, mdsc->caps_use_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) mdsc->caps_reserve_count, mdsc->caps_avail_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) BUG_ON(!ctx->count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) BUG_ON(ctx->count > mdsc->caps_reserve_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) BUG_ON(list_empty(&mdsc->caps_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) ctx->count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) ctx->used++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) mdsc->caps_reserve_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) mdsc->caps_use_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) cap = list_first_entry(&mdsc->caps_list, struct ceph_cap, caps_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) list_del(&cap->caps_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) mdsc->caps_reserve_count + mdsc->caps_avail_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) return cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) dout("put_cap %p %d = %d used + %d resv + %d avail\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) cap, mdsc->caps_total_count, mdsc->caps_use_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) mdsc->caps_reserve_count, mdsc->caps_avail_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) mdsc->caps_use_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * Keep some preallocated caps around (ceph_min_count), to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) * avoid lots of free/alloc churn.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) if (mdsc->caps_avail_count >= mdsc->caps_reserve_count +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) mdsc->caps_min_count) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) mdsc->caps_total_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) kmem_cache_free(ceph_cap_cachep, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) mdsc->caps_avail_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) list_add(&cap->caps_item, &mdsc->caps_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) BUG_ON(mdsc->caps_total_count != mdsc->caps_use_count +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) mdsc->caps_reserve_count + mdsc->caps_avail_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) void ceph_reservation_status(struct ceph_fs_client *fsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) int *total, int *avail, int *used, int *reserved,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) int *min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) struct ceph_mds_client *mdsc = fsc->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) if (total)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) *total = mdsc->caps_total_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) if (avail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) *avail = mdsc->caps_avail_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) if (used)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) *used = mdsc->caps_use_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) if (reserved)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) *reserved = mdsc->caps_reserve_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) if (min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) *min = mdsc->caps_min_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) * Find ceph_cap for given mds, if any.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) * Called with i_ceph_lock held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) static struct ceph_cap *__get_cap_for_mds(struct ceph_inode_info *ci, int mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) struct rb_node *n = ci->i_caps.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) while (n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) cap = rb_entry(n, struct ceph_cap, ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) if (mds < cap->mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) n = n->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) else if (mds > cap->mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) n = n->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) return cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) struct ceph_cap *ceph_get_cap_for_mds(struct ceph_inode_info *ci, int mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) cap = __get_cap_for_mds(ci, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) return cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) * Called under i_ceph_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) static void __insert_cap_node(struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) struct ceph_cap *new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) struct rb_node **p = &ci->i_caps.rb_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) struct rb_node *parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) struct ceph_cap *cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) while (*p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) parent = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) cap = rb_entry(parent, struct ceph_cap, ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) if (new->mds < cap->mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) p = &(*p)->rb_left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) else if (new->mds > cap->mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) p = &(*p)->rb_right;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) rb_link_node(&new->ci_node, parent, p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) rb_insert_color(&new->ci_node, &ci->i_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) * (re)set cap hold timeouts, which control the delayed release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) * of unused caps back to the MDS. Should be called on cap use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) static void __cap_set_timeouts(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) struct ceph_mount_options *opt = mdsc->fsc->mount_options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) ci->i_hold_caps_max = round_jiffies(jiffies +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) opt->caps_wanted_delay_max * HZ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) dout("__cap_set_timeouts %p %lu\n", &ci->vfs_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) ci->i_hold_caps_max - jiffies);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) * (Re)queue cap at the end of the delayed cap release list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) * If I_FLUSH is set, leave the inode at the front of the list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) * Caller holds i_ceph_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) * -> we take mdsc->cap_delay_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) dout("__cap_delay_requeue %p flags 0x%lx at %lu\n", &ci->vfs_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) ci->i_ceph_flags, ci->i_hold_caps_max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) if (!mdsc->stopping) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) spin_lock(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) if (!list_empty(&ci->i_cap_delay_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) if (ci->i_ceph_flags & CEPH_I_FLUSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) goto no_change;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) list_del_init(&ci->i_cap_delay_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) __cap_set_timeouts(mdsc, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) no_change:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) spin_unlock(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) * Queue an inode for immediate writeback. Mark inode with I_FLUSH,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) * indicating we should send a cap message to flush dirty metadata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) * asap, and move to the front of the delayed cap list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) dout("__cap_delay_requeue_front %p\n", &ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) spin_lock(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) ci->i_ceph_flags |= CEPH_I_FLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) if (!list_empty(&ci->i_cap_delay_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) list_del_init(&ci->i_cap_delay_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) list_add(&ci->i_cap_delay_list, &mdsc->cap_delay_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) spin_unlock(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) * Cancel delayed work on cap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) * Caller must hold i_ceph_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) dout("__cap_delay_cancel %p\n", &ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) if (list_empty(&ci->i_cap_delay_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) spin_lock(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) list_del_init(&ci->i_cap_delay_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) spin_unlock(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) /* Common issue checks for add_cap, handle_cap_grant. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) unsigned issued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) unsigned had = __ceph_caps_issued(ci, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * Each time we receive FILE_CACHE anew, we increment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * i_rdcache_gen.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) if (S_ISREG(ci->vfs_inode.i_mode) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) (issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) (had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) ci->i_rdcache_gen++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) * If FILE_SHARED is newly issued, mark dir not complete. We don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) * know what happened to this directory while we didn't have the cap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) * If FILE_SHARED is being revoked, also mark dir not complete. It
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) * stops on-going cached readdir.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) if ((issued & CEPH_CAP_FILE_SHARED) != (had & CEPH_CAP_FILE_SHARED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) if (issued & CEPH_CAP_FILE_SHARED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) atomic_inc(&ci->i_shared_gen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) if (S_ISDIR(ci->vfs_inode.i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) dout(" marking %p NOT complete\n", &ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) __ceph_dir_clear_complete(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) /* Wipe saved layout if we're losing DIR_CREATE caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) if (S_ISDIR(ci->vfs_inode.i_mode) && (had & CEPH_CAP_DIR_CREATE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) !(issued & CEPH_CAP_DIR_CREATE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) ceph_put_string(rcu_dereference_raw(ci->i_cached_layout.pool_ns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) * change_auth_cap_ses - move inode to appropriate lists when auth caps change
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) * @ci: inode to be moved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) * @session: new auth caps session
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) static void change_auth_cap_ses(struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) if (list_empty(&ci->i_dirty_item) && list_empty(&ci->i_flushing_item))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) spin_lock(&session->s_mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) if (!list_empty(&ci->i_dirty_item))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) list_move(&ci->i_dirty_item, &session->s_cap_dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) if (!list_empty(&ci->i_flushing_item))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) list_move_tail(&ci->i_flushing_item, &session->s_cap_flushing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) spin_unlock(&session->s_mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) * Add a capability under the given MDS session.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) * Caller should hold session snap_rwsem (read) and ci->i_ceph_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) * @fmode is the open file mode, if we are opening a file, otherwise
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) * it is < 0. (This is so we can atomically add the cap and add an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) * open file reference to it.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) void ceph_add_cap(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) struct ceph_mds_session *session, u64 cap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) unsigned issued, unsigned wanted,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) unsigned seq, unsigned mseq, u64 realmino, int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) struct ceph_cap **new_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) int actual_wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) u32 gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) dout("add_cap %p mds%d cap %llx %s seq %d\n", inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) session->s_mds, cap_id, ceph_cap_string(issued), seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) spin_lock(&session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) gen = session->s_cap_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) spin_unlock(&session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) cap = __get_cap_for_mds(ci, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) if (!cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) cap = *new_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) *new_cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) cap->issued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) cap->implemented = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) cap->mds = mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) cap->mds_wanted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) cap->mseq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) cap->ci = ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) __insert_cap_node(ci, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) /* add to session cap list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) cap->session = session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) list_add_tail(&cap->session_caps, &session->s_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) session->s_nr_caps++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) atomic64_inc(&mdsc->metric.total_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) list_move_tail(&cap->session_caps, &session->s_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) if (cap->cap_gen < gen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) cap->issued = cap->implemented = CEPH_CAP_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) * auth mds of the inode changed. we received the cap export
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) * message, but still haven't received the cap import message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) * handle_cap_export() updated the new auth MDS' cap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) * a message that was send before the cap import message. So
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) * don't remove caps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) if (ceph_seq_cmp(seq, cap->seq) <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) WARN_ON(cap != ci->i_auth_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) WARN_ON(cap->cap_id != cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) seq = cap->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) mseq = cap->mseq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) issued |= cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) flags |= CEPH_CAP_FLAG_AUTH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (!ci->i_snap_realm ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) ((flags & CEPH_CAP_FLAG_AUTH) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) realmino != (u64)-1 && ci->i_snap_realm->ino != realmino)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) * add this inode to the appropriate snap realm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) struct ceph_snap_realm *realm = ceph_lookup_snap_realm(mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) realmino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) if (realm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) struct ceph_snap_realm *oldrealm = ci->i_snap_realm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) if (oldrealm) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) spin_lock(&oldrealm->inodes_with_caps_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) list_del_init(&ci->i_snap_realm_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) spin_unlock(&oldrealm->inodes_with_caps_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) spin_lock(&realm->inodes_with_caps_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) list_add(&ci->i_snap_realm_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) &realm->inodes_with_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) ci->i_snap_realm = realm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) if (realm->ino == ci->i_vino.ino)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) realm->inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) spin_unlock(&realm->inodes_with_caps_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) if (oldrealm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) ceph_put_snap_realm(mdsc, oldrealm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) pr_err("ceph_add_cap: couldn't find snap realm %llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) realmino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) WARN_ON(!realm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) __check_cap_issue(ci, cap, issued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) * If we are issued caps we don't want, or the mds' wanted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) * value appears to be off, queue a check so we'll release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) * later and/or update the mds wanted value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) actual_wanted = __ceph_caps_wanted(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) if ((wanted & ~actual_wanted) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) (issued & ~actual_wanted & CEPH_CAP_ANY_WR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) dout(" issued %s, mds wanted %s, actual %s, queueing\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) ceph_cap_string(issued), ceph_cap_string(wanted),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) ceph_cap_string(actual_wanted));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) __cap_delay_requeue(mdsc, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) if (flags & CEPH_CAP_FLAG_AUTH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) if (!ci->i_auth_cap ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) ceph_seq_cmp(ci->i_auth_cap->mseq, mseq) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) if (ci->i_auth_cap &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) ci->i_auth_cap->session != cap->session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) change_auth_cap_ses(ci, cap->session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) ci->i_auth_cap = cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) cap->mds_wanted = wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) WARN_ON(ci->i_auth_cap == cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) dout("add_cap inode %p (%llx.%llx) cap %p %s now %s seq %d mds%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) inode, ceph_vinop(inode), cap, ceph_cap_string(issued),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) ceph_cap_string(issued|cap->issued), seq, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) cap->cap_id = cap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) cap->issued = issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) cap->implemented |= issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) if (ceph_seq_cmp(mseq, cap->mseq) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) cap->mds_wanted = wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) cap->mds_wanted |= wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) cap->seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) cap->issue_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) cap->mseq = mseq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) cap->cap_gen = gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) * Return true if cap has not timed out and belongs to the current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * generation of the MDS session (i.e. has not gone 'stale' due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * us losing touch with the mds).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) static int __cap_is_valid(struct ceph_cap *cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) unsigned long ttl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) u32 gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) spin_lock(&cap->session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) gen = cap->session->s_cap_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) ttl = cap->session->s_cap_ttl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) spin_unlock(&cap->session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) dout("__cap_is_valid %p cap %p issued %s "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) "but STALE (gen %u vs %u)\n", &cap->ci->vfs_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) cap, ceph_cap_string(cap->issued), cap->cap_gen, gen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) * Return set of valid cap bits issued to us. Note that caps time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) * out, and may be invalidated in bulk if the client session times out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) * and session->s_cap_gen is bumped.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) int have = ci->i_snap_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) if (implemented)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) *implemented = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) cap = rb_entry(p, struct ceph_cap, ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) if (!__cap_is_valid(cap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) dout("__ceph_caps_issued %p cap %p issued %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) &ci->vfs_inode, cap, ceph_cap_string(cap->issued));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) have |= cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) if (implemented)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) *implemented |= cap->implemented;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) * exclude caps issued by non-auth MDS, but are been revoking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) * by the auth MDS. The non-auth MDS should be revoking/exporting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) * these caps, but the message is delayed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) if (ci->i_auth_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) cap = ci->i_auth_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) have &= ~cap->implemented | cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) return have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) * Get cap bits issued by caps other than @ocap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) int __ceph_caps_issued_other(struct ceph_inode_info *ci, struct ceph_cap *ocap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) int have = ci->i_snap_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) cap = rb_entry(p, struct ceph_cap, ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) if (cap == ocap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) if (!__cap_is_valid(cap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) have |= cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) return have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) * Move a cap to the end of the LRU (oldest caps at list head, newest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) * at list tail).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) static void __touch_cap(struct ceph_cap *cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) struct ceph_mds_session *s = cap->session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) spin_lock(&s->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) if (!s->s_cap_iterator) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) dout("__touch_cap %p cap %p mds%d\n", &cap->ci->vfs_inode, cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) s->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) list_move_tail(&cap->session_caps, &s->s_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) dout("__touch_cap %p cap %p mds%d NOP, iterating over caps\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) &cap->ci->vfs_inode, cap, s->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) spin_unlock(&s->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) * Check if we hold the given mask. If so, move the cap(s) to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) * front of their respective LRUs. (This is the preferred way for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) * callers to check for caps they want.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) int __ceph_caps_issued_mask(struct ceph_inode_info *ci, int mask, int touch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) int have = ci->i_snap_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) if ((have & mask) == mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) dout("__ceph_caps_issued_mask ino 0x%llx snap issued %s"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) " (mask %s)\n", ceph_ino(&ci->vfs_inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) ceph_cap_string(have),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) ceph_cap_string(mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) cap = rb_entry(p, struct ceph_cap, ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) if (!__cap_is_valid(cap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) if ((cap->issued & mask) == mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) dout("__ceph_caps_issued_mask ino 0x%llx cap %p issued %s"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) " (mask %s)\n", ceph_ino(&ci->vfs_inode), cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) ceph_cap_string(cap->issued),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) ceph_cap_string(mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) if (touch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) __touch_cap(cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) /* does a combination of caps satisfy mask? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) have |= cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) if ((have & mask) == mask) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) dout("__ceph_caps_issued_mask ino 0x%llx combo issued %s"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) " (mask %s)\n", ceph_ino(&ci->vfs_inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) ceph_cap_string(cap->issued),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) ceph_cap_string(mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) if (touch) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) struct rb_node *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) /* touch this + preceding caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) __touch_cap(cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) for (q = rb_first(&ci->i_caps); q != p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) q = rb_next(q)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) cap = rb_entry(q, struct ceph_cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) if (!__cap_is_valid(cap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) if (cap->issued & mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) __touch_cap(cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) int __ceph_caps_issued_mask_metric(struct ceph_inode_info *ci, int mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) int touch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) r = __ceph_caps_issued_mask(ci, mask, touch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) if (r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) ceph_update_cap_hit(&fsc->mdsc->metric);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) ceph_update_cap_mis(&fsc->mdsc->metric);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) * Return true if mask caps are currently being revoked by an MDS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) int __ceph_caps_revoking_other(struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) struct ceph_cap *ocap, int mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) cap = rb_entry(p, struct ceph_cap, ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) if (cap != ocap &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) (cap->implemented & ~cap->issued & mask))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) struct inode *inode = &ci->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) ret = __ceph_caps_revoking_other(ci, NULL, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) dout("ceph_caps_revoking %p %s = %d\n", inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) ceph_cap_string(mask), ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) int __ceph_caps_used(struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) int used = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) if (ci->i_pin_ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) used |= CEPH_CAP_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) if (ci->i_rd_ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) used |= CEPH_CAP_FILE_RD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) if (ci->i_rdcache_ref ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) (S_ISREG(ci->vfs_inode.i_mode) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) ci->vfs_inode.i_data.nrpages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) used |= CEPH_CAP_FILE_CACHE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) if (ci->i_wr_ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) used |= CEPH_CAP_FILE_WR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) if (ci->i_wb_ref || ci->i_wrbuffer_ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) used |= CEPH_CAP_FILE_BUFFER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) if (ci->i_fx_ref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) used |= CEPH_CAP_FILE_EXCL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) return used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) #define FMODE_WAIT_BIAS 1000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) * wanted, by virtue of open file modes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) const int PIN_SHIFT = ffs(CEPH_FILE_MODE_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) const int RD_SHIFT = ffs(CEPH_FILE_MODE_RD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) const int WR_SHIFT = ffs(CEPH_FILE_MODE_WR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) const int LAZY_SHIFT = ffs(CEPH_FILE_MODE_LAZY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) struct ceph_mount_options *opt =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) ceph_inode_to_client(&ci->vfs_inode)->mount_options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) unsigned long used_cutoff = jiffies - opt->caps_wanted_delay_max * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) unsigned long idle_cutoff = jiffies - opt->caps_wanted_delay_min * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) if (S_ISDIR(ci->vfs_inode.i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) int want = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) /* use used_cutoff here, to keep dir's wanted caps longer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) if (ci->i_nr_by_mode[RD_SHIFT] > 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) time_after(ci->i_last_rd, used_cutoff))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) want |= CEPH_CAP_ANY_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) if (ci->i_nr_by_mode[WR_SHIFT] > 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) time_after(ci->i_last_wr, used_cutoff)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) if (opt->flags & CEPH_MOUNT_OPT_ASYNC_DIROPS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) want |= CEPH_CAP_ANY_DIR_OPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) if (want || ci->i_nr_by_mode[PIN_SHIFT] > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) want |= CEPH_CAP_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) return want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) int bits = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) if (ci->i_nr_by_mode[RD_SHIFT] > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) if (ci->i_nr_by_mode[RD_SHIFT] >= FMODE_WAIT_BIAS ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) time_after(ci->i_last_rd, used_cutoff))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) bits |= 1 << RD_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) } else if (time_after(ci->i_last_rd, idle_cutoff)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) bits |= 1 << RD_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) if (ci->i_nr_by_mode[WR_SHIFT] > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) if (ci->i_nr_by_mode[WR_SHIFT] >= FMODE_WAIT_BIAS ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) time_after(ci->i_last_wr, used_cutoff))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) bits |= 1 << WR_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) } else if (time_after(ci->i_last_wr, idle_cutoff)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) bits |= 1 << WR_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) /* check lazyio only when read/write is wanted */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) if ((bits & (CEPH_FILE_MODE_RDWR << 1)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) ci->i_nr_by_mode[LAZY_SHIFT] > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) bits |= 1 << LAZY_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) return bits ? ceph_caps_for_mode(bits >> 1) : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) * wanted, by virtue of open file modes AND cap refs (buffered/cached data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) int __ceph_caps_wanted(struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) int w = __ceph_caps_file_wanted(ci) | __ceph_caps_used(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) if (S_ISDIR(ci->vfs_inode.i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) /* we want EXCL if holding caps of dir ops */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) if (w & CEPH_CAP_ANY_DIR_OPS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) w |= CEPH_CAP_FILE_EXCL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) /* we want EXCL if dirty data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) if (w & CEPH_CAP_FILE_BUFFER)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) w |= CEPH_CAP_FILE_EXCL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) return w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) * Return caps we have registered with the MDS(s) as 'wanted'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) int mds_wanted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) cap = rb_entry(p, struct ceph_cap, ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) if (check && !__cap_is_valid(cap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) if (cap == ci->i_auth_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) mds_wanted |= cap->mds_wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) mds_wanted |= (cap->mds_wanted & ~CEPH_CAP_ANY_FILE_WR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) return mds_wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) int ceph_is_any_caps(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) ret = __ceph_is_any_real_caps(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) static void drop_inode_snap_realm(struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) struct ceph_snap_realm *realm = ci->i_snap_realm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) spin_lock(&realm->inodes_with_caps_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) list_del_init(&ci->i_snap_realm_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) ci->i_snap_realm_counter++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) ci->i_snap_realm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) if (realm->ino == ci->i_vino.ino)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) realm->inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) spin_unlock(&realm->inodes_with_caps_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) ceph_put_snap_realm(ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) realm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) * Remove a cap. Take steps to deal with a racing iterate_session_caps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) * caller should hold i_ceph_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) * caller will not hold session s_mutex if called from destroy_inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) struct ceph_mds_session *session = cap->session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) struct ceph_inode_info *ci = cap->ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) struct ceph_mds_client *mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) int removed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) /* 'ci' being NULL means the remove have already occurred */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) if (!ci) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) dout("%s: cap inode is NULL\n", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) /* remove from inode's cap rbtree, and clear auth cap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) rb_erase(&cap->ci_node, &ci->i_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) if (ci->i_auth_cap == cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) WARN_ON_ONCE(!list_empty(&ci->i_dirty_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) ci->i_auth_cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) /* remove from session list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) if (session->s_cap_iterator == cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) /* not yet, we are iterating over this very cap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) dout("__ceph_remove_cap delaying %p removal from session %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) cap, cap->session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) list_del_init(&cap->session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) session->s_nr_caps--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) atomic64_dec(&mdsc->metric.total_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) cap->session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) removed = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) /* protect backpointer with s_cap_lock: see iterate_session_caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) cap->ci = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) * s_cap_reconnect is protected by s_cap_lock. no one changes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) * s_cap_gen while session is in the reconnect state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) if (queue_release &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) (!session->s_cap_reconnect || cap->cap_gen == session->s_cap_gen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) cap->queue_release = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) if (removed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) __ceph_queue_cap_release(session, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) removed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) cap->queue_release = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) cap->cap_ino = ci->i_vino.ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) if (removed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) ceph_put_cap(mdsc, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) if (!__ceph_is_any_real_caps(ci)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) /* when reconnect denied, we remove session caps forcibly,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) * i_wr_ref can be non-zero. If there are ongoing write,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) * keep i_snap_realm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) if (ci->i_wr_ref == 0 && ci->i_snap_realm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) drop_inode_snap_realm(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) __cap_delay_cancel(mdsc, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) struct cap_msg_args {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) struct ceph_mds_session *session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) u64 ino, cid, follows;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) u64 flush_tid, oldest_flush_tid, size, max_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) u64 xattr_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) u64 change_attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) struct ceph_buffer *xattr_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) struct ceph_buffer *old_xattr_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) struct timespec64 atime, mtime, ctime, btime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) int op, caps, wanted, dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) u32 seq, issue_seq, mseq, time_warp_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) u32 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) kuid_t uid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) kgid_t gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) umode_t mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) bool inline_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) bool wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) * cap struct size + flock buffer size + inline version + inline data size +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) * osd_epoch_barrier + oldest_flush_tid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) #define CAP_MSG_SIZE (sizeof(struct ceph_mds_caps) + \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 4 + 8 + 4 + 4 + 8 + 4 + 4 + 4 + 8 + 8 + 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) /* Marshal up the cap msg to the MDS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) static void encode_cap_msg(struct ceph_msg *msg, struct cap_msg_args *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) struct ceph_mds_caps *fc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) struct ceph_osd_client *osdc = &arg->session->s_mdsc->fsc->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) dout("%s %s %llx %llx caps %s wanted %s dirty %s seq %u/%u tid %llu/%llu mseq %u follows %lld size %llu/%llu xattr_ver %llu xattr_len %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) __func__, ceph_cap_op_name(arg->op), arg->cid, arg->ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) ceph_cap_string(arg->caps), ceph_cap_string(arg->wanted),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) ceph_cap_string(arg->dirty), arg->seq, arg->issue_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) arg->flush_tid, arg->oldest_flush_tid, arg->mseq, arg->follows,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) arg->size, arg->max_size, arg->xattr_version,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) arg->xattr_buf ? (int)arg->xattr_buf->vec.iov_len : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) msg->hdr.version = cpu_to_le16(10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) msg->hdr.tid = cpu_to_le64(arg->flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) fc = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) memset(fc, 0, sizeof(*fc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) fc->cap_id = cpu_to_le64(arg->cid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) fc->op = cpu_to_le32(arg->op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) fc->seq = cpu_to_le32(arg->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) fc->issue_seq = cpu_to_le32(arg->issue_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) fc->migrate_seq = cpu_to_le32(arg->mseq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) fc->caps = cpu_to_le32(arg->caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) fc->wanted = cpu_to_le32(arg->wanted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) fc->dirty = cpu_to_le32(arg->dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) fc->ino = cpu_to_le64(arg->ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) fc->snap_follows = cpu_to_le64(arg->follows);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) fc->size = cpu_to_le64(arg->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) fc->max_size = cpu_to_le64(arg->max_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) ceph_encode_timespec64(&fc->mtime, &arg->mtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) ceph_encode_timespec64(&fc->atime, &arg->atime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) ceph_encode_timespec64(&fc->ctime, &arg->ctime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) fc->time_warp_seq = cpu_to_le32(arg->time_warp_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) fc->uid = cpu_to_le32(from_kuid(&init_user_ns, arg->uid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) fc->gid = cpu_to_le32(from_kgid(&init_user_ns, arg->gid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) fc->mode = cpu_to_le32(arg->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) fc->xattr_version = cpu_to_le64(arg->xattr_version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) if (arg->xattr_buf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) msg->middle = ceph_buffer_get(arg->xattr_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) fc->xattr_len = cpu_to_le32(arg->xattr_buf->vec.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) msg->hdr.middle_len = cpu_to_le32(arg->xattr_buf->vec.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) p = fc + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) /* flock buffer size (version 2) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) ceph_encode_32(&p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) /* inline version (version 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) ceph_encode_64(&p, arg->inline_data ? 0 : CEPH_INLINE_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) /* inline data size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) ceph_encode_32(&p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) * osd_epoch_barrier (version 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) * The epoch_barrier is protected osdc->lock, so READ_ONCE here in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) * case it was recently changed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) ceph_encode_32(&p, READ_ONCE(osdc->epoch_barrier));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) /* oldest_flush_tid (version 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) ceph_encode_64(&p, arg->oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) * caller_uid/caller_gid (version 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) * Currently, we don't properly track which caller dirtied the caps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) * last, and force a flush of them when there is a conflict. For now,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) * just set this to 0:0, to emulate how the MDS has worked up to now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) ceph_encode_32(&p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) ceph_encode_32(&p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) /* pool namespace (version 8) (mds always ignores this) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) ceph_encode_32(&p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) /* btime and change_attr (version 9) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) ceph_encode_timespec64(p, &arg->btime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) p += sizeof(struct ceph_timespec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) ceph_encode_64(&p, arg->change_attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) /* Advisory flags (version 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) ceph_encode_32(&p, arg->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) * Queue cap releases when an inode is dropped from our cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) void __ceph_remove_caps(struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) /* lock i_ceph_lock, because ceph_d_revalidate(..., LOOKUP_RCU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) * may call __ceph_caps_issued_mask() on a freeing inode. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) p = rb_first(&ci->i_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) struct ceph_cap *cap = rb_entry(p, struct ceph_cap, ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) __ceph_remove_cap(cap, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) * Prepare to send a cap message to an MDS. Update the cap state, and populate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) * the arg struct with the parameters that will need to be sent. This should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) * be done under the i_ceph_lock to guard against changes to cap state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) * Make note of max_size reported/requested from mds, revoked caps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) * that have now been implemented.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) static void __prep_cap(struct cap_msg_args *arg, struct ceph_cap *cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) int op, int flags, int used, int want, int retain,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) int flushing, u64 flush_tid, u64 oldest_flush_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) struct ceph_inode_info *ci = cap->ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) struct inode *inode = &ci->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) int held, revoking;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) held = cap->issued | cap->implemented;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) revoking = cap->implemented & ~cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) retain &= ~revoking;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) dout("%s %p cap %p session %p %s -> %s (revoking %s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) __func__, inode, cap, cap->session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) ceph_cap_string(held), ceph_cap_string(held & retain),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) ceph_cap_string(revoking));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) BUG_ON((retain & CEPH_CAP_PIN) == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) ci->i_ceph_flags &= ~CEPH_I_FLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) cap->issued &= retain; /* drop bits we don't want */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) * Wake up any waiters on wanted -> needed transition. This is due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) * the weird transition from buffered to sync IO... we need to flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) * dirty pages _before_ allowing sync writes to avoid reordering.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) arg->wake = cap->implemented & ~cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) cap->implemented &= cap->issued | used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) cap->mds_wanted = want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) arg->session = cap->session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) arg->ino = ceph_vino(inode).ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) arg->cid = cap->cap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) arg->follows = flushing ? ci->i_head_snapc->seq : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) arg->flush_tid = flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) arg->oldest_flush_tid = oldest_flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) arg->size = inode->i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) ci->i_reported_size = arg->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) arg->max_size = ci->i_wanted_max_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) if (cap == ci->i_auth_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) if (want & CEPH_CAP_ANY_FILE_WR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) ci->i_requested_max_size = arg->max_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) ci->i_requested_max_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) if (flushing & CEPH_CAP_XATTR_EXCL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) arg->old_xattr_buf = __ceph_build_xattrs_blob(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) arg->xattr_version = ci->i_xattrs.version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) arg->xattr_buf = ci->i_xattrs.blob;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) arg->xattr_buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) arg->old_xattr_buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) arg->mtime = inode->i_mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) arg->atime = inode->i_atime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) arg->ctime = inode->i_ctime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) arg->btime = ci->i_btime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) arg->change_attr = inode_peek_iversion_raw(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) arg->op = op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) arg->caps = cap->implemented;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) arg->wanted = want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) arg->dirty = flushing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) arg->seq = cap->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) arg->issue_seq = cap->issue_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) arg->mseq = cap->mseq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) arg->time_warp_seq = ci->i_time_warp_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) arg->uid = inode->i_uid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) arg->gid = inode->i_gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) arg->mode = inode->i_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) arg->inline_data = ci->i_inline_version != CEPH_INLINE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) if (!(flags & CEPH_CLIENT_CAPS_PENDING_CAPSNAP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) !list_empty(&ci->i_cap_snaps)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) struct ceph_cap_snap *capsnap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) list_for_each_entry_reverse(capsnap, &ci->i_cap_snaps, ci_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) if (capsnap->cap_flush.tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) if (capsnap->need_flush) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) flags |= CEPH_CLIENT_CAPS_PENDING_CAPSNAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) arg->flags = flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) * Send a cap msg on the given inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) * Caller should hold snap_rwsem (read), s_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) struct inode *inode = &ci->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) if (!msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) pr_err("error allocating cap msg: ino (%llx.%llx) flushing %s tid %llu, requeuing cap.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) ceph_vinop(inode), ceph_cap_string(arg->dirty),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) arg->flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) __cap_delay_requeue(arg->session->s_mdsc, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) encode_cap_msg(msg, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) ceph_con_send(&arg->session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) ceph_buffer_put(arg->old_xattr_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) if (arg->wake)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) static inline int __send_flush_snap(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) struct ceph_cap_snap *capsnap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) u32 mseq, u64 oldest_flush_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) struct cap_msg_args arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) if (!msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) arg.session = session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) arg.ino = ceph_vino(inode).ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) arg.cid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) arg.follows = capsnap->follows;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) arg.flush_tid = capsnap->cap_flush.tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) arg.oldest_flush_tid = oldest_flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) arg.size = capsnap->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) arg.max_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) arg.xattr_version = capsnap->xattr_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) arg.xattr_buf = capsnap->xattr_blob;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) arg.old_xattr_buf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) arg.atime = capsnap->atime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) arg.mtime = capsnap->mtime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) arg.ctime = capsnap->ctime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) arg.btime = capsnap->btime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) arg.change_attr = capsnap->change_attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) arg.op = CEPH_CAP_OP_FLUSHSNAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) arg.caps = capsnap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) arg.wanted = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) arg.dirty = capsnap->dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) arg.seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) arg.issue_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) arg.mseq = mseq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) arg.time_warp_seq = capsnap->time_warp_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) arg.uid = capsnap->uid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) arg.gid = capsnap->gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) arg.mode = capsnap->mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) arg.inline_data = capsnap->inline_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) arg.flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) arg.wake = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) encode_cap_msg(msg, &arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) ceph_con_send(&arg.session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) * When a snapshot is taken, clients accumulate dirty metadata on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) * inodes with capabilities in ceph_cap_snaps to describe the file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) * state at the time the snapshot was taken. This must be flushed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) * asynchronously back to the MDS once sync writes complete and dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) * data is written out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) * Called under i_ceph_lock. Takes s_mutex as needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) static void __ceph_flush_snaps(struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) __releases(ci->i_ceph_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) __acquires(ci->i_ceph_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) struct inode *inode = &ci->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) struct ceph_mds_client *mdsc = session->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) struct ceph_cap_snap *capsnap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) u64 oldest_flush_tid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) u64 first_tid = 1, last_tid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) dout("__flush_snaps %p session %p\n", inode, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) * we need to wait for sync writes to complete and for dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) * pages to be written out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) if (capsnap->dirty_pages || capsnap->writing)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) /* should be removed by ceph_try_drop_cap_snap() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) BUG_ON(!capsnap->need_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) /* only flush each capsnap once */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) if (capsnap->cap_flush.tid > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) dout(" already flushed %p, skipping\n", capsnap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) capsnap->cap_flush.tid = ++mdsc->last_cap_flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) list_add_tail(&capsnap->cap_flush.g_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) &mdsc->cap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) if (oldest_flush_tid == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) oldest_flush_tid = __get_oldest_flush_tid(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) if (list_empty(&ci->i_flushing_item)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) list_add_tail(&ci->i_flushing_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) &session->s_cap_flushing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) list_add_tail(&capsnap->cap_flush.i_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) &ci->i_cap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) if (first_tid == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) first_tid = capsnap->cap_flush.tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) last_tid = capsnap->cap_flush.tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) ci->i_ceph_flags &= ~CEPH_I_FLUSH_SNAPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) while (first_tid <= last_tid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) struct ceph_cap *cap = ci->i_auth_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) struct ceph_cap_flush *cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) if (!(cap && cap->session == session)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) dout("__flush_snaps %p auth cap %p not mds%d, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) "stop\n", inode, cap, session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) ret = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) if (cf->tid >= first_tid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) first_tid = cf->tid + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) capsnap = container_of(cf, struct ceph_cap_snap, cap_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) refcount_inc(&capsnap->nref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) dout("__flush_snaps %p capsnap %p tid %llu %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) inode, capsnap, cf->tid, ceph_cap_string(capsnap->dirty));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) pr_err("__flush_snaps: error sending cap flushsnap, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) "ino (%llx.%llx) tid %llu follows %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) ceph_vinop(inode), cf->tid, capsnap->follows);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) ceph_put_cap_snap(capsnap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) void ceph_flush_snaps(struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) struct ceph_mds_session **psession)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) struct inode *inode = &ci->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) struct ceph_mds_session *session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) int mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) dout("ceph_flush_snaps %p\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) if (psession)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) session = *psession;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) if (!(ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) dout(" no capsnap needs flush, doing nothing\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) if (!ci->i_auth_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) dout(" no auth cap (migrating?), doing nothing\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) mds = ci->i_auth_cap->session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) if (session && session->s_mds != mds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) dout(" oops, wrong session %p mutex\n", session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) if (!session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) session = __ceph_lookup_mds_session(mdsc, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) if (session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) dout(" inverting session/ino locks on %p\n", session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) // make sure flushsnap messages are sent in proper order.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) __kick_flushing_caps(mdsc, session, ci, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) __ceph_flush_snaps(ci, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) if (psession) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) *psession = session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) } else if (session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) /* we flushed them all; remove this inode from the queue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) spin_lock(&mdsc->snap_flush_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) list_del_init(&ci->i_snap_flush_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) spin_unlock(&mdsc->snap_flush_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) * Mark caps dirty. If inode is newly dirty, return the dirty flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) * Caller is then responsible for calling __mark_inode_dirty with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) * returned flags value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) int __ceph_mark_dirty_caps(struct ceph_inode_info *ci, int mask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) struct ceph_cap_flush **pcf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) struct ceph_mds_client *mdsc =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) struct inode *inode = &ci->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) int was = ci->i_dirty_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) int dirty = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) if (!ci->i_auth_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) pr_warn("__mark_dirty_caps %p %llx mask %s, "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) "but no auth cap (session was closed?)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) inode, ceph_ino(inode), ceph_cap_string(mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) ceph_cap_string(mask), ceph_cap_string(was),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) ceph_cap_string(was | mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) ci->i_dirty_caps |= mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) if (was == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) struct ceph_mds_session *session = ci->i_auth_cap->session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) WARN_ON_ONCE(ci->i_prealloc_cap_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) swap(ci->i_prealloc_cap_flush, *pcf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) if (!ci->i_head_snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) WARN_ON_ONCE(!rwsem_is_locked(&mdsc->snap_rwsem));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) ci->i_head_snapc = ceph_get_snap_context(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) ci->i_snap_realm->cached_context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) dout(" inode %p now dirty snapc %p auth cap %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) BUG_ON(!list_empty(&ci->i_dirty_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) list_add(&ci->i_dirty_item, &session->s_cap_dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) if (ci->i_flushing_caps == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) dirty |= I_DIRTY_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) WARN_ON_ONCE(!ci->i_prealloc_cap_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) BUG_ON(list_empty(&ci->i_dirty_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) if (((was | ci->i_flushing_caps) & CEPH_CAP_FILE_BUFFER) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) (mask & CEPH_CAP_FILE_BUFFER))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) dirty |= I_DIRTY_DATASYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) __cap_delay_requeue(mdsc, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) return dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) struct ceph_cap_flush *ceph_alloc_cap_flush(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) struct ceph_cap_flush *cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) cf = kmem_cache_alloc(ceph_cap_flush_cachep, GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) if (!cf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) cf->is_capsnap = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) return cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) void ceph_free_cap_flush(struct ceph_cap_flush *cf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) if (cf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) kmem_cache_free(ceph_cap_flush_cachep, cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) static u64 __get_oldest_flush_tid(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) if (!list_empty(&mdsc->cap_flush_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) struct ceph_cap_flush *cf =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) list_first_entry(&mdsc->cap_flush_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) struct ceph_cap_flush, g_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) return cf->tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) * Remove cap_flush from the mdsc's or inode's flushing cap list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) * Return true if caller needs to wake up flush waiters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) static bool __detach_cap_flush_from_mdsc(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) struct ceph_cap_flush *cf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) struct ceph_cap_flush *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) bool wake = cf->wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) if (wake && cf->g_list.prev != &mdsc->cap_flush_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) prev = list_prev_entry(cf, g_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) prev->wake = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) wake = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) list_del_init(&cf->g_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) return wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) static bool __detach_cap_flush_from_ci(struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) struct ceph_cap_flush *cf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) struct ceph_cap_flush *prev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) bool wake = cf->wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) if (wake && cf->i_list.prev != &ci->i_cap_flush_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) prev = list_prev_entry(cf, i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) prev->wake = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) wake = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) list_del_init(&cf->i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) return wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) * Add dirty inode to the flushing list. Assigned a seq number so we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) * can wait for caps to flush without starving.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) * Called under i_ceph_lock. Returns the flush tid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) static u64 __mark_caps_flushing(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) struct ceph_mds_session *session, bool wake,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) u64 *oldest_flush_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) struct ceph_cap_flush *cf = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) int flushing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) BUG_ON(ci->i_dirty_caps == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) BUG_ON(list_empty(&ci->i_dirty_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) BUG_ON(!ci->i_prealloc_cap_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) flushing = ci->i_dirty_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) dout("__mark_caps_flushing flushing %s, flushing_caps %s -> %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) ceph_cap_string(flushing),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) ceph_cap_string(ci->i_flushing_caps),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) ceph_cap_string(ci->i_flushing_caps | flushing));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) ci->i_flushing_caps |= flushing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) ci->i_dirty_caps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) dout(" inode %p now !dirty\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) swap(cf, ci->i_prealloc_cap_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) cf->caps = flushing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) cf->wake = wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) list_del_init(&ci->i_dirty_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) cf->tid = ++mdsc->last_cap_flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) list_add_tail(&cf->g_list, &mdsc->cap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) *oldest_flush_tid = __get_oldest_flush_tid(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) if (list_empty(&ci->i_flushing_item)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) list_add_tail(&ci->i_flushing_item, &session->s_cap_flushing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) mdsc->num_cap_flushing++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) list_add_tail(&cf->i_list, &ci->i_cap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) return cf->tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) * try to invalidate mapping pages without blocking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) static int try_nonblocking_invalidate(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) __releases(ci->i_ceph_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) __acquires(ci->i_ceph_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) u32 invalidating_gen = ci->i_rdcache_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) ceph_fscache_invalidate(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) invalidate_mapping_pages(&inode->i_data, 0, -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) if (inode->i_data.nrpages == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) invalidating_gen == ci->i_rdcache_gen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) /* success. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) dout("try_nonblocking_invalidate %p success\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) /* save any racing async invalidate some trouble */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) dout("try_nonblocking_invalidate %p failed\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) bool __ceph_should_report_size(struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) loff_t size = ci->vfs_inode.i_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) /* mds will adjust max size according to the reported size */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) if (ci->i_flushing_caps & CEPH_CAP_FILE_WR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) if (size >= ci->i_max_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) /* half of previous max_size increment has been used */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) if (ci->i_max_size > ci->i_reported_size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) (size << 1) >= ci->i_max_size + ci->i_reported_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) * Swiss army knife function to examine currently used and wanted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) * versus held caps. Release, flush, ack revoked caps to mds as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) * appropriate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) * CHECK_CAPS_AUTHONLY - we should only check the auth cap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) * CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) * further delay.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) void ceph_check_caps(struct ceph_inode_info *ci, int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) struct inode *inode = &ci->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) u64 flush_tid, oldest_flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) int file_wanted, used, cap_used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) int took_snap_rwsem = 0; /* true if mdsc->snap_rwsem held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) int issued, implemented, want, retain, revoking, flushing = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) int mds = -1; /* keep track of how far we've gone through i_caps list
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) to avoid an infinite loop on retry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) bool queue_invalidate = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) bool tried_invalidate = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) if (ci->i_ceph_flags & CEPH_I_FLUSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) flags |= CHECK_CAPS_FLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) goto retry_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) retry_locked:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) /* Caps wanted by virtue of active open files. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) file_wanted = __ceph_caps_file_wanted(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) /* Caps which have active references against them */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) used = __ceph_caps_used(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) * "issued" represents the current caps that the MDS wants us to have.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) * "implemented" is the set that we have been granted, and includes the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) * ones that have not yet been returned to the MDS (the "revoking" set,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) * usually because they have outstanding references).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) issued = __ceph_caps_issued(ci, &implemented);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) revoking = implemented & ~issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) want = file_wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) /* The ones we currently want to retain (may be adjusted below) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) retain = file_wanted | used | CEPH_CAP_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) if (!mdsc->stopping && inode->i_nlink > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) if (file_wanted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) retain |= CEPH_CAP_ANY; /* be greedy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) } else if (S_ISDIR(inode->i_mode) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) (issued & CEPH_CAP_FILE_SHARED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) __ceph_dir_is_complete(ci)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) * If a directory is complete, we want to keep
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) * the exclusive cap. So that MDS does not end up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) * revoking the shared cap on every create/unlink
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) * operation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) if (IS_RDONLY(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) want = CEPH_CAP_ANY_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) want |= CEPH_CAP_ANY_SHARED | CEPH_CAP_FILE_EXCL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) retain |= want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) retain |= CEPH_CAP_ANY_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) * keep RD only if we didn't have the file open RW,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) * because then the mds would revoke it anyway to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) * journal max_size=0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) if (ci->i_max_size == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) retain |= CEPH_CAP_ANY_RD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) dout("check_caps %p file_want %s used %s dirty %s flushing %s"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) " issued %s revoking %s retain %s %s%s\n", inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) ceph_cap_string(file_wanted),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) ceph_cap_string(ci->i_flushing_caps),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) ceph_cap_string(issued), ceph_cap_string(revoking),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) ceph_cap_string(retain),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) (flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) (flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) * If we no longer need to hold onto old our caps, and we may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) * have cached pages, but don't want them, then try to invalidate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) * If we fail, it's because pages are locked.... try again later.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) if ((!(flags & CHECK_CAPS_NOINVAL) || mdsc->stopping) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) S_ISREG(inode->i_mode) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) !(ci->i_wb_ref || ci->i_wrbuffer_ref) && /* no dirty pages... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) inode->i_data.nrpages && /* have cached pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) (revoking & (CEPH_CAP_FILE_CACHE|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) CEPH_CAP_FILE_LAZYIO)) && /* or revoking cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) !tried_invalidate) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) dout("check_caps trying to invalidate on %p\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) if (try_nonblocking_invalidate(inode) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) dout("check_caps queuing invalidate\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) queue_invalidate = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) ci->i_rdcache_revoking = ci->i_rdcache_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) tried_invalidate = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) goto retry_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) int mflags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) struct cap_msg_args arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) cap = rb_entry(p, struct ceph_cap, ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) /* avoid looping forever */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) if (mds >= cap->mds ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) ((flags & CHECK_CAPS_AUTHONLY) && cap != ci->i_auth_cap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) /* NOTE: no side-effects allowed, until we take s_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) * If we have an auth cap, we don't need to consider any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) * overlapping caps as used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) cap_used = used;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) if (ci->i_auth_cap && cap != ci->i_auth_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) cap_used &= ~ci->i_auth_cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) revoking = cap->implemented & ~cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) dout(" mds%d cap %p used %s issued %s implemented %s revoking %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) cap->mds, cap, ceph_cap_string(cap_used),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) ceph_cap_string(cap->issued),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) ceph_cap_string(cap->implemented),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) ceph_cap_string(revoking));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) if (cap == ci->i_auth_cap &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) (cap->issued & CEPH_CAP_FILE_WR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) /* request larger max_size from MDS? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) if (ci->i_wanted_max_size > ci->i_max_size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) ci->i_wanted_max_size > ci->i_requested_max_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) dout("requesting new max_size\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) goto ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) /* approaching file_max? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) if (__ceph_should_report_size(ci)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) dout("i_size approaching max_size\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) goto ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) /* flush anything dirty? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) if (cap == ci->i_auth_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) if ((flags & CHECK_CAPS_FLUSH) && ci->i_dirty_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) dout("flushing dirty caps\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) goto ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) dout("flushing snap caps\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) goto ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) /* completed revocation? going down and there are no caps? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) if (revoking && (revoking & cap_used) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) dout("completed revocation of %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) ceph_cap_string(cap->implemented & ~cap->issued));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) goto ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) /* want more caps from mds? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) if (want & ~cap->mds_wanted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) if (want & ~(cap->mds_wanted | cap->issued))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) goto ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) if (!__cap_is_valid(cap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) goto ack;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) /* things we might delay */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) if ((cap->issued & ~retain) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) continue; /* nope, all good */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) ack:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) if (session && session != cap->session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) dout("oops, wrong session %p mutex\n", session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) if (!session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) session = cap->session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) if (mutex_trylock(&session->s_mutex) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) dout("inverting session/ino locks on %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) session = ceph_get_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) if (took_snap_rwsem) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) up_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) took_snap_rwsem = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) if (session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) * Because we take the reference while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) * holding the i_ceph_lock, it should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) * never be NULL. Throw a warning if it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) * ever is.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) WARN_ON_ONCE(true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) /* kick flushing and flush snaps before sending normal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) * cap message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) if (cap == ci->i_auth_cap &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) (ci->i_ceph_flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) __kick_flushing_caps(mdsc, session, ci, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) __ceph_flush_snaps(ci, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) goto retry_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) /* take snap_rwsem after session mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) if (!took_snap_rwsem) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) if (down_read_trylock(&mdsc->snap_rwsem) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) dout("inverting snap/in locks on %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) down_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) took_snap_rwsem = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) took_snap_rwsem = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) if (cap == ci->i_auth_cap && ci->i_dirty_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) flushing = ci->i_dirty_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) flush_tid = __mark_caps_flushing(inode, session, false,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) &oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) if (flags & CHECK_CAPS_FLUSH &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) list_empty(&session->s_cap_dirty))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) mflags |= CEPH_CLIENT_CAPS_SYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) flushing = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) flush_tid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) oldest_flush_tid = __get_oldest_flush_tid(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) mds = cap->mds; /* remember mds, so we don't repeat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) __prep_cap(&arg, cap, CEPH_CAP_OP_UPDATE, mflags, cap_used,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) want, retain, flushing, flush_tid, oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) __send_cap(&arg, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) goto retry; /* retake i_ceph_lock and restart our cap scan. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) /* periodically re-calculate caps wanted by open files */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) if (__ceph_is_any_real_caps(ci) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) list_empty(&ci->i_cap_delay_list) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) (file_wanted & ~CEPH_CAP_PIN) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) !(used & (CEPH_CAP_FILE_RD | CEPH_CAP_ANY_FILE_WR))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) __cap_delay_requeue(mdsc, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) if (queue_invalidate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) ceph_queue_invalidate(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) if (session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) if (took_snap_rwsem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) up_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) * Try to flush dirty caps back to the auth mds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) static int try_flush_caps(struct inode *inode, u64 *ptid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) struct ceph_mds_session *session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) int flushing = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) u64 flush_tid = 0, oldest_flush_tid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) retry_locked:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) if (ci->i_dirty_caps && ci->i_auth_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) struct ceph_cap *cap = ci->i_auth_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) struct cap_msg_args arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) if (session != cap->session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) if (session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) session = cap->session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) if (cap->session->s_state < CEPH_MDS_SESSION_OPEN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) if (ci->i_ceph_flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) (CEPH_I_KICK_FLUSH | CEPH_I_FLUSH_SNAPS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) __kick_flushing_caps(mdsc, session, ci, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) if (ci->i_ceph_flags & CEPH_I_FLUSH_SNAPS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) __ceph_flush_snaps(ci, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) goto retry_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) flushing = ci->i_dirty_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) flush_tid = __mark_caps_flushing(inode, session, true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) &oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) __prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH, CEPH_CLIENT_CAPS_SYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) __ceph_caps_used(ci), __ceph_caps_wanted(ci),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) (cap->issued | cap->implemented),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) flushing, flush_tid, oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) __send_cap(&arg, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) if (!list_empty(&ci->i_cap_flush_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) struct ceph_cap_flush *cf =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) list_last_entry(&ci->i_cap_flush_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) struct ceph_cap_flush, i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) cf->wake = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) flush_tid = cf->tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) flushing = ci->i_flushing_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) if (session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) *ptid = flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) return flushing;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) * Return true if we've flushed caps through the given flush_tid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) static int caps_are_flushed(struct inode *inode, u64 flush_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) int ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) if (!list_empty(&ci->i_cap_flush_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) struct ceph_cap_flush * cf =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) list_first_entry(&ci->i_cap_flush_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) struct ceph_cap_flush, i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) if (cf->tid <= flush_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) * wait for any unsafe requests to complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) static int unsafe_request_wait(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) struct ceph_mds_request *req1 = NULL, *req2 = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) int ret, err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) spin_lock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) if (S_ISDIR(inode->i_mode) && !list_empty(&ci->i_unsafe_dirops)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) req1 = list_last_entry(&ci->i_unsafe_dirops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) struct ceph_mds_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) r_unsafe_dir_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) ceph_mdsc_get_request(req1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) if (!list_empty(&ci->i_unsafe_iops)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) req2 = list_last_entry(&ci->i_unsafe_iops,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) struct ceph_mds_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) r_unsafe_target_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) ceph_mdsc_get_request(req2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) spin_unlock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) dout("unsafe_request_wait %p wait on tid %llu %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) inode, req1 ? req1->r_tid : 0ULL, req2 ? req2->r_tid : 0ULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) if (req1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) ret = !wait_for_completion_timeout(&req1->r_safe_completion,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) ceph_timeout_jiffies(req1->r_timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) ceph_mdsc_put_request(req1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) if (req2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) ret = !wait_for_completion_timeout(&req2->r_safe_completion,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) ceph_timeout_jiffies(req2->r_timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) ceph_mdsc_put_request(req2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) struct inode *inode = file->f_mapping->host;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) u64 flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) int ret, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) int dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) ret = file_write_and_wait_range(file, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) if (datasync)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) ret = ceph_wait_on_async_create(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) dirty = try_flush_caps(inode, &flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) dout("fsync dirty caps are %s\n", ceph_cap_string(dirty));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) err = unsafe_request_wait(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) * only wait on non-file metadata writeback (the mds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) * can recover size and mtime, so we don't need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) * wait for that)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) if (!err && (dirty & ~CEPH_CAP_ANY_FILE_WR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) err = wait_event_interruptible(ci->i_cap_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) caps_are_flushed(inode, flush_tid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) ret = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) err = file_check_and_advance_wb_err(file);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) ret = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) dout("fsync %p%s result=%d\n", inode, datasync ? " datasync" : "", ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) * Flush any dirty caps back to the mds. If we aren't asked to wait,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) * queue inode for flush but don't do so immediately, because we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) * get by with fewer MDS messages if we wait for data writeback to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) * complete first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) int ceph_write_inode(struct inode *inode, struct writeback_control *wbc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) u64 flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) int dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) int wait = (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) dout("write_inode %p wait=%d\n", inode, wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) if (wait) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) dirty = try_flush_caps(inode, &flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) if (dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) err = wait_event_interruptible(ci->i_cap_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) caps_are_flushed(inode, flush_tid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) struct ceph_mds_client *mdsc =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) ceph_sb_to_client(inode->i_sb)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) if (__ceph_caps_dirty(ci))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) __cap_delay_requeue_front(mdsc, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) static void __kick_flushing_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) u64 oldest_flush_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) __releases(ci->i_ceph_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) __acquires(ci->i_ceph_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) struct inode *inode = &ci->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) struct ceph_cap_flush *cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) u64 first_tid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) u64 last_snap_flush = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) ci->i_ceph_flags &= ~CEPH_I_KICK_FLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) list_for_each_entry_reverse(cf, &ci->i_cap_flush_list, i_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) if (cf->is_capsnap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) last_snap_flush = cf->tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) list_for_each_entry(cf, &ci->i_cap_flush_list, i_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) if (cf->tid < first_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) cap = ci->i_auth_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) if (!(cap && cap->session == session)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) pr_err("%p auth cap %p not mds%d ???\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) inode, cap, session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) first_tid = cf->tid + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) if (!cf->is_capsnap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) struct cap_msg_args arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) dout("kick_flushing_caps %p cap %p tid %llu %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) inode, cap, cf->tid, ceph_cap_string(cf->caps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) __prep_cap(&arg, cap, CEPH_CAP_OP_FLUSH,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) (cf->tid < last_snap_flush ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) CEPH_CLIENT_CAPS_PENDING_CAPSNAP : 0),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) __ceph_caps_used(ci),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) __ceph_caps_wanted(ci),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) (cap->issued | cap->implemented),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) cf->caps, cf->tid, oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) __send_cap(&arg, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) struct ceph_cap_snap *capsnap =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) container_of(cf, struct ceph_cap_snap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) cap_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) dout("kick_flushing_caps %p capsnap %p tid %llu %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) inode, capsnap, cf->tid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) ceph_cap_string(capsnap->dirty));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) refcount_inc(&capsnap->nref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) ret = __send_flush_snap(inode, session, capsnap, cap->mseq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) pr_err("kick_flushing_caps: error sending "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) "cap flushsnap, ino (%llx.%llx) "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) "tid %llu follows %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) ceph_vinop(inode), cf->tid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) capsnap->follows);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) ceph_put_cap_snap(capsnap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) void ceph_early_kick_flushing_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) struct ceph_inode_info *ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) u64 oldest_flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) dout("early_kick_flushing_caps mds%d\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) oldest_flush_tid = __get_oldest_flush_tid(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) cap = ci->i_auth_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) if (!(cap && cap->session == session)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) pr_err("%p auth cap %p not mds%d ???\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) &ci->vfs_inode, cap, session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) * if flushing caps were revoked, we re-send the cap flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) * in client reconnect stage. This guarantees MDS * processes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) * the cap flush message before issuing the flushing caps to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) * other client.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) if ((cap->issued & ci->i_flushing_caps) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) ci->i_flushing_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) /* encode_caps_cb() also will reset these sequence
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) * numbers. make sure sequence numbers in cap flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) * message match later reconnect message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) cap->seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) cap->issue_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) cap->mseq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) __kick_flushing_caps(mdsc, session, ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) ci->i_ceph_flags |= CEPH_I_KICK_FLUSH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) void ceph_kick_flushing_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) struct ceph_inode_info *ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) u64 oldest_flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) lockdep_assert_held(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) dout("kick_flushing_caps mds%d\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) oldest_flush_tid = __get_oldest_flush_tid(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) list_for_each_entry(ci, &session->s_cap_flushing, i_flushing_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) cap = ci->i_auth_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) if (!(cap && cap->session == session)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) pr_err("%p auth cap %p not mds%d ???\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) &ci->vfs_inode, cap, session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) if (ci->i_ceph_flags & CEPH_I_KICK_FLUSH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) __kick_flushing_caps(mdsc, session, ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) void ceph_kick_flushing_inode_caps(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) struct ceph_inode_info *ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) struct ceph_mds_client *mdsc = session->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) struct ceph_cap *cap = ci->i_auth_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) dout("%s %p flushing %s\n", __func__, &ci->vfs_inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) ceph_cap_string(ci->i_flushing_caps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) if (!list_empty(&ci->i_cap_flush_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) u64 oldest_flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) list_move_tail(&ci->i_flushing_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) &cap->session->s_cap_flushing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) oldest_flush_tid = __get_oldest_flush_tid(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) __kick_flushing_caps(mdsc, session, ci, oldest_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) * Take references to capabilities we hold, so that we don't release
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) * them to the MDS prematurely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) void ceph_take_cap_refs(struct ceph_inode_info *ci, int got,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) bool snap_rwsem_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) if (got & CEPH_CAP_PIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) ci->i_pin_ref++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) if (got & CEPH_CAP_FILE_RD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) ci->i_rd_ref++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) if (got & CEPH_CAP_FILE_CACHE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) ci->i_rdcache_ref++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) if (got & CEPH_CAP_FILE_EXCL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) ci->i_fx_ref++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) if (got & CEPH_CAP_FILE_WR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) if (ci->i_wr_ref == 0 && !ci->i_head_snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) BUG_ON(!snap_rwsem_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) ci->i_head_snapc = ceph_get_snap_context(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) ci->i_snap_realm->cached_context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) ci->i_wr_ref++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) if (got & CEPH_CAP_FILE_BUFFER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) if (ci->i_wb_ref == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) ihold(&ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) ci->i_wb_ref++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) dout("%s %p wb %d -> %d (?)\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) * Try to grab cap references. Specify those refs we @want, and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) * minimal set we @need. Also include the larger offset we are writing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) * to (when applicable), and check against max_size here as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) * Note that caller is responsible for ensuring max_size increases are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) * requested from the MDS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) * Returns 0 if caps were not able to be acquired (yet), 1 if succeed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) * or a negative error code. There are 3 speical error codes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) * -EAGAIN: need to sleep but non-blocking is specified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) * -EFBIG: ask caller to call check_max_size() and try again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) * -ESTALE: ask caller to call ceph_renew_caps() and try again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) /* first 8 bits are reserved for CEPH_FILE_MODE_FOO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) NON_BLOCKING = (1 << 8),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) CHECK_FILELOCK = (1 << 9),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) static int try_get_cap_refs(struct inode *inode, int need, int want,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) loff_t endoff, int flags, int *got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) int have, implemented;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) bool snap_rwsem_locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) dout("get_cap_refs %p need %s want %s\n", inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) ceph_cap_string(need), ceph_cap_string(want));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) if ((flags & CHECK_FILELOCK) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) (ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) dout("try_get_cap_refs %p error filelock\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) /* finish pending truncate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) while (ci->i_truncate_pending) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) if (snap_rwsem_locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) up_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) snap_rwsem_locked = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) __ceph_do_pending_vmtruncate(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) have = __ceph_caps_issued(ci, &implemented);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) if (have & need & CEPH_CAP_FILE_WR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) if (endoff >= 0 && endoff > (loff_t)ci->i_max_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) dout("get_cap_refs %p endoff %llu > maxsize %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) inode, endoff, ci->i_max_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) if (endoff > ci->i_requested_max_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) ret = ci->i_auth_cap ? -EFBIG : -ESTALE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) * If a sync write is in progress, we must wait, so that we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) * can get a final snapshot value for size+mtime.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) if (__ceph_have_pending_cap_snap(ci)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) dout("get_cap_refs %p cap_snap_pending\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) if ((have & need) == need) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) * Look at (implemented & ~have & not) so that we keep waiting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) * on transition from wanted -> needed caps. This is needed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) * for WRBUFFER|WR -> WR to avoid a new WR sync write from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) * going before a prior buffered writeback happens.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) int not = want & ~(have & need);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) int revoking = implemented & ~have;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) dout("get_cap_refs %p have %s but not %s (revoking %s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) inode, ceph_cap_string(have), ceph_cap_string(not),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) ceph_cap_string(revoking));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) if ((revoking & not) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) if (!snap_rwsem_locked &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) !ci->i_head_snapc &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) (need & CEPH_CAP_FILE_WR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) if (!down_read_trylock(&mdsc->snap_rwsem)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) * we can not call down_read() when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) * task isn't in TASK_RUNNING state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) if (flags & NON_BLOCKING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) ret = -EAGAIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) down_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) snap_rwsem_locked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) snap_rwsem_locked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) if ((have & want) == want)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) *got = need | want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) *got = need;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) if (S_ISREG(inode->i_mode) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) (need & CEPH_CAP_FILE_RD) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) !(*got & CEPH_CAP_FILE_CACHE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) ceph_disable_fscache_readpage(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) ceph_take_cap_refs(ci, *got, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) int session_readonly = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) int mds_wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) if (ci->i_auth_cap &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) (need & (CEPH_CAP_FILE_WR | CEPH_CAP_FILE_EXCL))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) struct ceph_mds_session *s = ci->i_auth_cap->session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) spin_lock(&s->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) session_readonly = s->s_readonly;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) spin_unlock(&s->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) if (session_readonly) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) dout("get_cap_refs %p need %s but mds%d readonly\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) inode, ceph_cap_string(need), ci->i_auth_cap->mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) ret = -EROFS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) dout("get_cap_refs %p forced umount\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) ret = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) mds_wanted = __ceph_caps_mds_wanted(ci, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) if (need & ~mds_wanted) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) dout("get_cap_refs %p need %s > mds_wanted %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) inode, ceph_cap_string(need),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) ceph_cap_string(mds_wanted));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) ret = -ESTALE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) dout("get_cap_refs %p have %s need %s\n", inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) ceph_cap_string(have), ceph_cap_string(need));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) __ceph_touch_fmode(ci, mdsc, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) if (snap_rwsem_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) up_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) ceph_update_cap_mis(&mdsc->metric);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) else if (ret == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) ceph_update_cap_hit(&mdsc->metric);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) dout("get_cap_refs %p ret %d got %s\n", inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) ret, ceph_cap_string(*got));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) * Check the offset we are writing up to against our current
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) * max_size. If necessary, tell the MDS we want to write to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) * a larger offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) static void check_max_size(struct inode *inode, loff_t endoff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) int check = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) /* do we need to explicitly request a larger max_size? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) if (endoff >= ci->i_max_size && endoff > ci->i_wanted_max_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) dout("write %p at large endoff %llu, req max_size\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) inode, endoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) ci->i_wanted_max_size = endoff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) /* duplicate ceph_check_caps()'s logic */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) if (ci->i_auth_cap &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) (ci->i_auth_cap->issued & CEPH_CAP_FILE_WR) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) ci->i_wanted_max_size > ci->i_max_size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) ci->i_wanted_max_size > ci->i_requested_max_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) check = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) if (check)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) ceph_check_caps(ci, CHECK_CAPS_AUTHONLY, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) static inline int get_used_fmode(int caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) int fmode = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) if (caps & CEPH_CAP_FILE_RD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) fmode |= CEPH_FILE_MODE_RD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) if (caps & CEPH_CAP_FILE_WR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) fmode |= CEPH_FILE_MODE_WR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) return fmode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) int ceph_try_get_caps(struct inode *inode, int need, int want,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) bool nonblock, int *got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) int ret, flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) BUG_ON(need & ~CEPH_CAP_FILE_RD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) BUG_ON(want & ~(CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) CEPH_CAP_ANY_DIR_OPS));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) if (need) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) ret = ceph_pool_perm_check(inode, need);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) flags = get_used_fmode(need | want);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) if (nonblock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) flags |= NON_BLOCKING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) ret = try_get_cap_refs(inode, need, want, 0, flags, got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) /* three special error codes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) if (ret == -EAGAIN || ret == -EFBIG || ret == -ESTALE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) * Wait for caps, and take cap references. If we can't get a WR cap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) * due to a small max_size, make sure we check_max_size (and possibly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) * ask the mds) so we don't get hung up indefinitely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) int ceph_get_caps(struct file *filp, int need, int want,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) loff_t endoff, int *got, struct page **pinned_page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) struct ceph_file_info *fi = filp->private_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) struct inode *inode = file_inode(filp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) int ret, _got, flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) ret = ceph_pool_perm_check(inode, need);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) if ((fi->fmode & CEPH_FILE_MODE_WR) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) fi->filp_gen != READ_ONCE(fsc->filp_gen))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) flags = get_used_fmode(need | want);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) while (true) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) flags &= CEPH_FILE_MODE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) if (atomic_read(&fi->num_locks))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) flags |= CHECK_FILELOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) _got = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) ret = try_get_cap_refs(inode, need, want, endoff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) flags, &_got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) WARN_ON_ONCE(ret == -EAGAIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) if (!ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) struct ceph_mds_client *mdsc = fsc->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) struct cap_wait cw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) DEFINE_WAIT_FUNC(wait, woken_wake_function);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) cw.ino = ceph_ino(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) cw.tgid = current->tgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) cw.need = need;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) cw.want = want;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) list_add(&cw.list, &mdsc->cap_wait_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) /* make sure used fmode not timeout */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) ceph_get_fmode(ci, flags, FMODE_WAIT_BIAS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) add_wait_queue(&ci->i_cap_wq, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) flags |= NON_BLOCKING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) while (!(ret = try_get_cap_refs(inode, need, want,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) endoff, flags, &_got))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) if (signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) ret = -ERESTARTSYS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) remove_wait_queue(&ci->i_cap_wq, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) ceph_put_fmode(ci, flags, FMODE_WAIT_BIAS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) spin_lock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) list_del(&cw.list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) spin_unlock(&mdsc->caps_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) if (ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) if ((fi->fmode & CEPH_FILE_MODE_WR) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) fi->filp_gen != READ_ONCE(fsc->filp_gen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) if (ret >= 0 && _got)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) ceph_put_cap_refs(ci, _got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) return -EBADF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) if (ret == -EFBIG || ret == -ESTALE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) int ret2 = ceph_wait_on_async_create(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) if (ret2 < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) return ret2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) if (ret == -EFBIG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) check_max_size(inode, endoff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) if (ret == -ESTALE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) /* session was killed, try renew caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) ret = ceph_renew_caps(inode, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) if (ret == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) if (S_ISREG(ci->vfs_inode.i_mode) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) ci->i_inline_version != CEPH_INLINE_NONE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) (_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) i_size_read(inode) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) struct page *page =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) find_get_page(inode->i_mapping, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) if (page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) if (PageUptodate(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) *pinned_page = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) * drop cap refs first because getattr while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) * holding * caps refs can cause deadlock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) ceph_put_cap_refs(ci, _got);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) _got = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) * getattr request will bring inline data into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) * page cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) ret = __ceph_do_getattr(inode, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) CEPH_STAT_CAP_INLINE_DATA,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) if (S_ISREG(ci->vfs_inode.i_mode) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) (_got & CEPH_CAP_FILE_RD) && (_got & CEPH_CAP_FILE_CACHE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) ceph_fscache_revalidate_cookie(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) *got = _got;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) * Take cap refs. Caller must already know we hold at least one ref
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) * on the caps in question or we don't know this is safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) void ceph_get_cap_refs(struct ceph_inode_info *ci, int caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) ceph_take_cap_refs(ci, caps, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) * drop cap_snap that is not associated with any snapshot.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) * we don't need to send FLUSHSNAP message for it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) static int ceph_try_drop_cap_snap(struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) struct ceph_cap_snap *capsnap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) if (!capsnap->need_flush &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) !capsnap->writing && !capsnap->dirty_pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) dout("dropping cap_snap %p follows %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) capsnap, capsnap->follows);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) BUG_ON(capsnap->cap_flush.tid > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) ceph_put_snap_context(capsnap->context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) if (!list_is_last(&capsnap->ci_item, &ci->i_cap_snaps))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) list_del(&capsnap->ci_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) ceph_put_cap_snap(capsnap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) * Release cap refs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) * If we released the last ref on any given cap, call ceph_check_caps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) * to release (or schedule a release).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) * If we are releasing a WR cap (from a sync write), finalize any affected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) * cap_snap, and wake up any waiters.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) bool skip_checking_caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) struct inode *inode = &ci->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) int last = 0, put = 0, flushsnaps = 0, wake = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) if (had & CEPH_CAP_PIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) --ci->i_pin_ref;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) if (had & CEPH_CAP_FILE_RD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) if (--ci->i_rd_ref == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) last++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) if (had & CEPH_CAP_FILE_CACHE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) if (--ci->i_rdcache_ref == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) last++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) if (had & CEPH_CAP_FILE_EXCL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) if (--ci->i_fx_ref == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) last++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) if (had & CEPH_CAP_FILE_BUFFER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) if (--ci->i_wb_ref == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) last++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) put++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) dout("put_cap_refs %p wb %d -> %d (?)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) inode, ci->i_wb_ref+1, ci->i_wb_ref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) if (had & CEPH_CAP_FILE_WR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) if (--ci->i_wr_ref == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) last++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) if (__ceph_have_pending_cap_snap(ci)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) struct ceph_cap_snap *capsnap =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) list_last_entry(&ci->i_cap_snaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) struct ceph_cap_snap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) ci_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) capsnap->writing = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) if (ceph_try_drop_cap_snap(ci, capsnap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) put++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) else if (__ceph_finish_cap_snap(ci, capsnap))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) flushsnaps = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) wake = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) if (ci->i_wrbuffer_ref_head == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) ci->i_dirty_caps == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) ci->i_flushing_caps == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) BUG_ON(!ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) ceph_put_snap_context(ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) ci->i_head_snapc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) /* see comment in __ceph_remove_cap() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) if (!__ceph_is_any_real_caps(ci) && ci->i_snap_realm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) drop_inode_snap_realm(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) dout("put_cap_refs %p had %s%s%s\n", inode, ceph_cap_string(had),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) last ? " last" : "", put ? " put" : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) if (!skip_checking_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) if (last)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) ceph_check_caps(ci, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) else if (flushsnaps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) ceph_flush_snaps(ci, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) if (wake)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) while (put-- > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) void ceph_put_cap_refs(struct ceph_inode_info *ci, int had)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) __ceph_put_cap_refs(ci, had, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) void ceph_put_cap_refs_no_check_caps(struct ceph_inode_info *ci, int had)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) __ceph_put_cap_refs(ci, had, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) * Release @nr WRBUFFER refs on dirty pages for the given @snapc snap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) * context. Adjust per-snap dirty page accounting as appropriate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) * Once all dirty data for a cap_snap is flushed, flush snapped file
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) * metadata back to the MDS. If we dropped the last ref, call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) * ceph_check_caps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) struct ceph_snap_context *snapc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) struct inode *inode = &ci->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) struct ceph_cap_snap *capsnap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) int put = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) bool last = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) bool found = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) bool flush_snaps = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) bool complete_capsnap = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) ci->i_wrbuffer_ref -= nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) if (ci->i_wrbuffer_ref == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) last = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) put++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) if (ci->i_head_snapc == snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) ci->i_wrbuffer_ref_head -= nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) if (ci->i_wrbuffer_ref_head == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) ci->i_wr_ref == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) ci->i_dirty_caps == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) ci->i_flushing_caps == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) BUG_ON(!ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) ceph_put_snap_context(ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) ci->i_head_snapc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) dout("put_wrbuffer_cap_refs on %p head %d/%d -> %d/%d %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) ci->i_wrbuffer_ref+nr, ci->i_wrbuffer_ref_head+nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) ci->i_wrbuffer_ref, ci->i_wrbuffer_ref_head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) last ? " LAST" : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) if (capsnap->context == snapc) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) found = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) if (!found) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) * The capsnap should already be removed when removing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) * auth cap in the case of a forced unmount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) WARN_ON_ONCE(ci->i_auth_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) goto unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) capsnap->dirty_pages -= nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) if (capsnap->dirty_pages == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) complete_capsnap = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) if (!capsnap->writing) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) if (ceph_try_drop_cap_snap(ci, capsnap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) put++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) ci->i_ceph_flags |= CEPH_I_FLUSH_SNAPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) flush_snaps = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) dout("put_wrbuffer_cap_refs on %p cap_snap %p "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) " snap %lld %d/%d -> %d/%d %s%s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) inode, capsnap, capsnap->context->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) ci->i_wrbuffer_ref+nr, capsnap->dirty_pages + nr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) ci->i_wrbuffer_ref, capsnap->dirty_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) last ? " (wrbuffer last)" : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) complete_capsnap ? " (complete capsnap)" : "");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) if (last) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) ceph_check_caps(ci, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) } else if (flush_snaps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) ceph_flush_snaps(ci, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) if (complete_capsnap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) while (put-- > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) /* avoid calling iput_final() in osd dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) ceph_async_iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) * Invalidate unlinked inode's aliases, so we can drop the inode ASAP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) static void invalidate_aliases(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) struct dentry *dn, *prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) dout("invalidate_aliases inode %p\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) d_prune_aliases(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) * For non-directory inode, d_find_alias() only returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) * hashed dentry. After calling d_invalidate(), the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) * dentry becomes unhashed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) * For directory inode, d_find_alias() can return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) * unhashed dentry. But directory inode should have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) * one alias at most.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) while ((dn = d_find_alias(inode))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) if (dn == prev) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) dput(dn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) d_invalidate(dn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) if (prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) dput(prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) prev = dn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) if (prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) dput(prev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) struct cap_extra_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) struct ceph_string *pool_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) /* inline data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) u64 inline_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) void *inline_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) u32 inline_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) /* dirstat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) bool dirstat_valid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) u64 nfiles;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) u64 nsubdirs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) u64 change_attr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) /* currently issued */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) int issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) struct timespec64 btime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) * Handle a cap GRANT message from the MDS. (Note that a GRANT may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) * actually be a revocation if it specifies a smaller cap set.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) * caller holds s_mutex and i_ceph_lock, we drop both.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) static void handle_cap_grant(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) struct ceph_cap *cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) struct ceph_mds_caps *grant,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) struct ceph_buffer *xattr_buf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) struct cap_extra_info *extra_info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) __releases(ci->i_ceph_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) __releases(session->s_mdsc->snap_rwsem)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) int seq = le32_to_cpu(grant->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) int newcaps = le32_to_cpu(grant->caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) int used, wanted, dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) u64 size = le64_to_cpu(grant->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) u64 max_size = le64_to_cpu(grant->max_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) unsigned char check_caps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) bool was_stale = cap->cap_gen < session->s_cap_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) bool wake = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) bool writeback = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) bool queue_trunc = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) bool queue_invalidate = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) bool deleted_inode = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) bool fill_inline = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) inode, cap, session->s_mds, seq, ceph_cap_string(newcaps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) inode->i_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) * If CACHE is being revoked, and we have no dirty buffers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) * try to invalidate (once). (If there are dirty buffers, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) * will invalidate _after_ writeback.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) if (S_ISREG(inode->i_mode) && /* don't invalidate readdir cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) ((cap->issued & ~newcaps) & CEPH_CAP_FILE_CACHE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) (newcaps & CEPH_CAP_FILE_LAZYIO) == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) !(ci->i_wrbuffer_ref || ci->i_wb_ref)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) if (try_nonblocking_invalidate(inode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) /* there were locked pages.. invalidate later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) in a separate thread. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) queue_invalidate = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) ci->i_rdcache_revoking = ci->i_rdcache_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) if (was_stale)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) cap->issued = cap->implemented = CEPH_CAP_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) * auth mds of the inode changed. we received the cap export message,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) * but still haven't received the cap import message. handle_cap_export
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) * updated the new auth MDS' cap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) * "ceph_seq_cmp(seq, cap->seq) <= 0" means we are processing a message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) * that was sent before the cap import message. So don't remove caps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) if (ceph_seq_cmp(seq, cap->seq) <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) WARN_ON(cap != ci->i_auth_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) WARN_ON(cap->cap_id != le64_to_cpu(grant->cap_id));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) seq = cap->seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) newcaps |= cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) /* side effects now are allowed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) cap->cap_gen = session->s_cap_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) cap->seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) __check_cap_issue(ci, cap, newcaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) inode_set_max_iversion_raw(inode, extra_info->change_attr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) if ((newcaps & CEPH_CAP_AUTH_SHARED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) (extra_info->issued & CEPH_CAP_AUTH_EXCL) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) inode->i_mode = le32_to_cpu(grant->mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) ci->i_btime = extra_info->btime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) dout("%p mode 0%o uid.gid %d.%d\n", inode, inode->i_mode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) from_kuid(&init_user_ns, inode->i_uid),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) from_kgid(&init_user_ns, inode->i_gid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) if ((newcaps & CEPH_CAP_LINK_SHARED) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) (extra_info->issued & CEPH_CAP_LINK_EXCL) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) set_nlink(inode, le32_to_cpu(grant->nlink));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) if (inode->i_nlink == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) deleted_inode = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) if ((extra_info->issued & CEPH_CAP_XATTR_EXCL) == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) grant->xattr_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) int len = le32_to_cpu(grant->xattr_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) u64 version = le64_to_cpu(grant->xattr_version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) if (version > ci->i_xattrs.version) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) dout(" got new xattrs v%llu on %p len %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) version, inode, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) if (ci->i_xattrs.blob)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) ceph_buffer_put(ci->i_xattrs.blob);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) ci->i_xattrs.blob = ceph_buffer_get(xattr_buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) ci->i_xattrs.version = version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) ceph_forget_all_cached_acls(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) ceph_security_invalidate_secctx(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) if (newcaps & CEPH_CAP_ANY_RD) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) struct timespec64 mtime, atime, ctime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) /* ctime/mtime/atime? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) ceph_decode_timespec64(&mtime, &grant->mtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) ceph_decode_timespec64(&atime, &grant->atime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) ceph_decode_timespec64(&ctime, &grant->ctime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) ceph_fill_file_time(inode, extra_info->issued,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) le32_to_cpu(grant->time_warp_seq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) &ctime, &mtime, &atime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) if ((newcaps & CEPH_CAP_FILE_SHARED) && extra_info->dirstat_valid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) ci->i_files = extra_info->nfiles;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) ci->i_subdirs = extra_info->nsubdirs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) /* file layout may have changed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) s64 old_pool = ci->i_layout.pool_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) struct ceph_string *old_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) ceph_file_layout_from_legacy(&ci->i_layout, &grant->layout);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) old_ns = rcu_dereference_protected(ci->i_layout.pool_ns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) lockdep_is_held(&ci->i_ceph_lock));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) rcu_assign_pointer(ci->i_layout.pool_ns, extra_info->pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) if (ci->i_layout.pool_id != old_pool ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) extra_info->pool_ns != old_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) ci->i_ceph_flags &= ~CEPH_I_POOL_PERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) extra_info->pool_ns = old_ns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) /* size/truncate_seq? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) queue_trunc = ceph_fill_file_size(inode, extra_info->issued,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) le32_to_cpu(grant->truncate_seq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) le64_to_cpu(grant->truncate_size),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) if (ci->i_auth_cap == cap && (newcaps & CEPH_CAP_ANY_FILE_WR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) if (max_size != ci->i_max_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) dout("max_size %lld -> %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) ci->i_max_size, max_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) ci->i_max_size = max_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) if (max_size >= ci->i_wanted_max_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) ci->i_wanted_max_size = 0; /* reset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) ci->i_requested_max_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) wake = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) /* check cap bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) wanted = __ceph_caps_wanted(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) used = __ceph_caps_used(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) dirty = __ceph_caps_dirty(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) dout(" my wanted = %s, used = %s, dirty %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) ceph_cap_string(wanted),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) ceph_cap_string(used),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) ceph_cap_string(dirty));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) if ((was_stale || le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) (wanted & ~(cap->mds_wanted | newcaps))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) * If mds is importing cap, prior cap messages that update
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) * 'wanted' may get dropped by mds (migrate seq mismatch).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) * We don't send cap message to update 'wanted' if what we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) * want are already issued. If mds revokes caps, cap message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) * that releases caps also tells mds what we want. But if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) * caps got revoked by mds forcedly (session stale). We may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) * haven't told mds what we want.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) check_caps = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) /* revocation, grant, or no-op? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) if (cap->issued & ~newcaps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) int revoking = cap->issued & ~newcaps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) dout("revocation: %s -> %s (revoking %s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) ceph_cap_string(cap->issued),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) ceph_cap_string(newcaps),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) ceph_cap_string(revoking));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) if (S_ISREG(inode->i_mode) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) (revoking & used & CEPH_CAP_FILE_BUFFER))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) writeback = true; /* initiate writeback; will delay ack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) else if (queue_invalidate &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) revoking == CEPH_CAP_FILE_CACHE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) (newcaps & CEPH_CAP_FILE_LAZYIO) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) ; /* do nothing yet, invalidation will be queued */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) else if (cap == ci->i_auth_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) check_caps = 1; /* check auth cap only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) check_caps = 2; /* check all caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) cap->issued = newcaps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) cap->implemented |= newcaps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) } else if (cap->issued == newcaps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) dout("caps unchanged: %s -> %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) ceph_cap_string(cap->issued), ceph_cap_string(newcaps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) dout("grant: %s -> %s\n", ceph_cap_string(cap->issued),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) ceph_cap_string(newcaps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) /* non-auth MDS is revoking the newly grant caps ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) if (cap == ci->i_auth_cap &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) __ceph_caps_revoking_other(ci, cap, newcaps))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) check_caps = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) cap->issued = newcaps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) cap->implemented |= newcaps; /* add bits only, to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) * avoid stepping on a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) * pending revocation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) wake = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) BUG_ON(cap->issued & ~cap->implemented);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) if (extra_info->inline_version > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) extra_info->inline_version >= ci->i_inline_version) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) ci->i_inline_version = extra_info->inline_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) if (ci->i_inline_version != CEPH_INLINE_NONE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) fill_inline = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) if (ci->i_auth_cap == cap &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) if (newcaps & ~extra_info->issued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) wake = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) if (ci->i_requested_max_size > max_size ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) !(le32_to_cpu(grant->wanted) & CEPH_CAP_ANY_FILE_WR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) /* re-request max_size if necessary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) ci->i_requested_max_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) wake = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) ceph_kick_flushing_inode_caps(session, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) up_read(&session->s_mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) if (fill_inline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) ceph_fill_inline_data(inode, NULL, extra_info->inline_data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) extra_info->inline_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) if (queue_trunc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) ceph_queue_vmtruncate(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) if (writeback)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) * queue inode for writeback: we can't actually call
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) * filemap_write_and_wait, etc. from message handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) * context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) ceph_queue_writeback(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) if (queue_invalidate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) ceph_queue_invalidate(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) if (deleted_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) invalidate_aliases(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) if (wake)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) if (check_caps == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) else if (check_caps == 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) ceph_check_caps(ci, CHECK_CAPS_NOINVAL, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) * Handle FLUSH_ACK from MDS, indicating that metadata we sent to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) * MDS has been safely committed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) static void handle_cap_flush_ack(struct inode *inode, u64 flush_tid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) struct ceph_mds_caps *m,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) struct ceph_cap *cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) __releases(ci->i_ceph_lock)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) struct ceph_cap_flush *cf, *tmp_cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) LIST_HEAD(to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) unsigned seq = le32_to_cpu(m->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) int dirty = le32_to_cpu(m->dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) int cleaned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) bool drop = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) bool wake_ci = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) bool wake_mdsc = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) list_for_each_entry_safe(cf, tmp_cf, &ci->i_cap_flush_list, i_list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) /* Is this the one that was flushed? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) if (cf->tid == flush_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) cleaned = cf->caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) /* Is this a capsnap? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) if (cf->is_capsnap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) if (cf->tid <= flush_tid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) * An earlier or current tid. The FLUSH_ACK should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) * represent a superset of this flush's caps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) wake_ci |= __detach_cap_flush_from_ci(ci, cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) list_add_tail(&cf->i_list, &to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) * This is a later one. Any caps in it are still dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) * so don't count them as cleaned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) cleaned &= ~cf->caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) if (!cleaned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) dout("handle_cap_flush_ack inode %p mds%d seq %d on %s cleaned %s,"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) " flushing %s -> %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) inode, session->s_mds, seq, ceph_cap_string(dirty),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) ceph_cap_string(cleaned), ceph_cap_string(ci->i_flushing_caps),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) ceph_cap_string(ci->i_flushing_caps & ~cleaned));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) if (list_empty(&to_remove) && !cleaned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) ci->i_flushing_caps &= ~cleaned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) list_for_each_entry(cf, &to_remove, i_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) wake_mdsc |= __detach_cap_flush_from_mdsc(mdsc, cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) if (ci->i_flushing_caps == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) if (list_empty(&ci->i_cap_flush_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) list_del_init(&ci->i_flushing_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) if (!list_empty(&session->s_cap_flushing)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) dout(" mds%d still flushing cap on %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) session->s_mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) &list_first_entry(&session->s_cap_flushing,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) struct ceph_inode_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) i_flushing_item)->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) mdsc->num_cap_flushing--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) dout(" inode %p now !flushing\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) if (ci->i_dirty_caps == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) dout(" inode %p now clean\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) BUG_ON(!list_empty(&ci->i_dirty_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) drop = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) if (ci->i_wr_ref == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) ci->i_wrbuffer_ref_head == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) BUG_ON(!ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) ceph_put_snap_context(ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) ci->i_head_snapc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) BUG_ON(list_empty(&ci->i_dirty_item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) while (!list_empty(&to_remove)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) cf = list_first_entry(&to_remove,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) struct ceph_cap_flush, i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) list_del_init(&cf->i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) if (!cf->is_capsnap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) ceph_free_cap_flush(cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) if (wake_ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) if (wake_mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) wake_up_all(&mdsc->cap_flushing_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) if (drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) void __ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) bool *wake_ci, bool *wake_mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) bool ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) dout("removing capsnap %p, inode %p ci %p\n", capsnap, inode, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) list_del_init(&capsnap->ci_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) ret = __detach_cap_flush_from_ci(ci, &capsnap->cap_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) if (wake_ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) *wake_ci = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) if (list_empty(&ci->i_cap_flush_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) list_del_init(&ci->i_flushing_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) ret = __detach_cap_flush_from_mdsc(mdsc, &capsnap->cap_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) if (wake_mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) *wake_mdsc = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) void ceph_remove_capsnap(struct inode *inode, struct ceph_cap_snap *capsnap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) bool *wake_ci, bool *wake_mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) WARN_ON_ONCE(capsnap->dirty_pages || capsnap->writing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) __ceph_remove_capsnap(inode, capsnap, wake_ci, wake_mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) * Handle FLUSHSNAP_ACK. MDS has flushed snap data to disk and we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) * throw away our cap_snap.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) * Caller hold s_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) static void handle_cap_flushsnap_ack(struct inode *inode, u64 flush_tid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) struct ceph_mds_caps *m,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) u64 follows = le64_to_cpu(m->snap_follows);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) struct ceph_cap_snap *capsnap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) bool flushed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) bool wake_ci = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) bool wake_mdsc = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) dout("handle_cap_flushsnap_ack inode %p ci %p mds%d follows %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) inode, ci, session->s_mds, follows);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) list_for_each_entry(capsnap, &ci->i_cap_snaps, ci_item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) if (capsnap->follows == follows) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) if (capsnap->cap_flush.tid != flush_tid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) dout(" cap_snap %p follows %lld tid %lld !="
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) " %lld\n", capsnap, follows,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) flush_tid, capsnap->cap_flush.tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) flushed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) dout(" skipping cap_snap %p follows %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) capsnap, capsnap->follows);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) if (flushed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) ceph_remove_capsnap(inode, capsnap, &wake_ci, &wake_mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) if (flushed) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) ceph_put_snap_context(capsnap->context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) ceph_put_cap_snap(capsnap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) if (wake_ci)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) if (wake_mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) wake_up_all(&mdsc->cap_flushing_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) * Handle TRUNC from MDS, indicating file truncation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) * caller hold s_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) static bool handle_cap_trunc(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) struct ceph_mds_caps *trunc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) int seq = le32_to_cpu(trunc->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) u32 truncate_seq = le32_to_cpu(trunc->truncate_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) u64 truncate_size = le64_to_cpu(trunc->truncate_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) u64 size = le64_to_cpu(trunc->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) int implemented = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) int dirty = __ceph_caps_dirty(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) int issued = __ceph_caps_issued(ceph_inode(inode), &implemented);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) bool queue_trunc = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) issued |= implemented | dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) dout("handle_cap_trunc inode %p mds%d seq %d to %lld seq %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) inode, mds, seq, truncate_size, truncate_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) queue_trunc = ceph_fill_file_size(inode, issued,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) truncate_seq, truncate_size, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) return queue_trunc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) * Handle EXPORT from MDS. Cap is being migrated _from_ this mds to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) * different one. If we are the most recent migration we've seen (as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) * indicated by mseq), make note of the migrating cap bits for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) * duration (until we see the corresponding IMPORT).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) * caller holds s_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) struct ceph_mds_cap_peer *ph,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) struct ceph_mds_session *tsession = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) struct ceph_cap *cap, *tcap, *new_cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) u64 t_cap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) unsigned mseq = le32_to_cpu(ex->migrate_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) unsigned t_seq, t_mseq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) int target, issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) if (ph) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) t_cap_id = le64_to_cpu(ph->cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) t_seq = le32_to_cpu(ph->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) t_mseq = le32_to_cpu(ph->mseq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) target = le32_to_cpu(ph->mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) t_cap_id = t_seq = t_mseq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) target = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) dout("handle_cap_export inode %p ci %p mds%d mseq %d target %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) inode, ci, mds, mseq, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) cap = __get_cap_for_mds(ci, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) if (target < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) __ceph_remove_cap(cap, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) * now we know we haven't received the cap import message yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) * because the exported cap still exist.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) issued = cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) if (issued != cap->implemented)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) pr_err_ratelimited("handle_cap_export: issued != implemented: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) "ino (%llx.%llx) mds%d seq %d mseq %d "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) "issued %s implemented %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) ceph_vinop(inode), mds, cap->seq, cap->mseq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) ceph_cap_string(issued),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) ceph_cap_string(cap->implemented));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) tcap = __get_cap_for_mds(ci, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) if (tcap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) /* already have caps from the target */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) if (tcap->cap_id == t_cap_id &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) ceph_seq_cmp(tcap->seq, t_seq) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) dout(" updating import cap %p mds%d\n", tcap, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) tcap->cap_id = t_cap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) tcap->seq = t_seq - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) tcap->issue_seq = t_seq - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) tcap->issued |= issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) tcap->implemented |= issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) if (cap == ci->i_auth_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) ci->i_auth_cap = tcap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) change_auth_cap_ses(ci, tcap->session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) __ceph_remove_cap(cap, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) } else if (tsession) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) /* add placeholder for the export tagert */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) tcap = new_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) ceph_add_cap(inode, tsession, t_cap_id, issued, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) if (!list_empty(&ci->i_cap_flush_list) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) ci->i_auth_cap == tcap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) list_move_tail(&ci->i_flushing_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) &tcap->session->s_cap_flushing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) __ceph_remove_cap(cap, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) /* open target session */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) tsession = ceph_mdsc_open_export_target_session(mdsc, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) if (!IS_ERR(tsession)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) if (mds > target) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) mutex_lock_nested(&tsession->s_mutex,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) mutex_lock(&tsession->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) mutex_lock_nested(&session->s_mutex,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) SINGLE_DEPTH_NESTING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) new_cap = ceph_get_cap(mdsc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) tsession = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) target = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) if (tsession) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) mutex_unlock(&tsession->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) ceph_put_mds_session(tsession);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) if (new_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) ceph_put_cap(mdsc, new_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) * Handle cap IMPORT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) * caller holds s_mutex. acquires i_ceph_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) static void handle_cap_import(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) struct inode *inode, struct ceph_mds_caps *im,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) struct ceph_mds_cap_peer *ph,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) struct ceph_cap **target_cap, int *old_issued)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) struct ceph_cap *cap, *ocap, *new_cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) int issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) unsigned caps = le32_to_cpu(im->caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) unsigned wanted = le32_to_cpu(im->wanted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) unsigned seq = le32_to_cpu(im->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) unsigned mseq = le32_to_cpu(im->migrate_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) u64 realmino = le64_to_cpu(im->realm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) u64 cap_id = le64_to_cpu(im->cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) u64 p_cap_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940) int peer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) if (ph) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) p_cap_id = le64_to_cpu(ph->cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) peer = le32_to_cpu(ph->mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) p_cap_id = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) peer = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) inode, ci, mds, mseq, peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) cap = __get_cap_for_mds(ci, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) if (!cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) if (!new_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) new_cap = ceph_get_cap(mdsc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) cap = new_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) if (new_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) ceph_put_cap(mdsc, new_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) new_cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) __ceph_caps_issued(ci, &issued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) issued |= __ceph_caps_dirty(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) ceph_add_cap(inode, session, cap_id, caps, wanted, seq, mseq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) realmino, CEPH_CAP_FLAG_AUTH, &new_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) if (ocap && ocap->cap_id == p_cap_id) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) dout(" remove export cap %p mds%d flags %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) ocap, peer, ph->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) if ((ph->flags & CEPH_CAP_FLAG_AUTH) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) (ocap->seq != le32_to_cpu(ph->seq) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) ocap->mseq != le32_to_cpu(ph->mseq))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) pr_err_ratelimited("handle_cap_import: "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) "mismatched seq/mseq: ino (%llx.%llx) "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) "mds%d seq %d mseq %d importer mds%d "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) "has peer seq %d mseq %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) ceph_vinop(inode), peer, ocap->seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) ocap->mseq, mds, le32_to_cpu(ph->seq),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) le32_to_cpu(ph->mseq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) *old_issued = issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) *target_cap = cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) * Handle a caps message from the MDS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) * Identify the appropriate session, inode, and call the right handler
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) * based on the cap op.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) void ceph_handle_caps(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) struct ceph_mds_client *mdsc = session->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) struct ceph_inode_info *ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) struct ceph_mds_caps *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) struct ceph_mds_cap_peer *peer = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) struct ceph_snap_realm *realm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) int op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) int msg_version = le16_to_cpu(msg->hdr.version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) u32 seq, mseq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) struct ceph_vino vino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) void *snaptrace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) size_t snaptrace_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) void *p, *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) struct cap_extra_info extra_info = {};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) bool queue_trunc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) dout("handle_caps from mds%d\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) /* decode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) end = msg->front.iov_base + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) if (msg->front.iov_len < sizeof(*h))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) h = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) op = le32_to_cpu(h->op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) vino.ino = le64_to_cpu(h->ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) vino.snap = CEPH_NOSNAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) seq = le32_to_cpu(h->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) mseq = le32_to_cpu(h->migrate_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) snaptrace = h + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) snaptrace_len = le32_to_cpu(h->snap_trace_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) p = snaptrace + snaptrace_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) if (msg_version >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) u32 flock_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) ceph_decode_32_safe(&p, end, flock_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) if (p + flock_len > end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) p += flock_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) if (msg_version >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) if (op == CEPH_CAP_OP_IMPORT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) if (p + sizeof(*peer) > end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) peer = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) p += sizeof(*peer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) } else if (op == CEPH_CAP_OP_EXPORT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) /* recorded in unused fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) peer = (void *)&h->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) if (msg_version >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) ceph_decode_64_safe(&p, end, extra_info.inline_version, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) ceph_decode_32_safe(&p, end, extra_info.inline_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) if (p + extra_info.inline_len > end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) extra_info.inline_data = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) p += extra_info.inline_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) if (msg_version >= 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) u32 epoch_barrier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) ceph_decode_32_safe(&p, end, epoch_barrier, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) ceph_osdc_update_epoch_barrier(osdc, epoch_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) if (msg_version >= 8) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) u64 flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) u32 caller_uid, caller_gid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) u32 pool_ns_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) /* version >= 6 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) ceph_decode_64_safe(&p, end, flush_tid, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) /* version >= 7 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) ceph_decode_32_safe(&p, end, caller_uid, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086) ceph_decode_32_safe(&p, end, caller_gid, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) /* version >= 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) ceph_decode_32_safe(&p, end, pool_ns_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) if (pool_ns_len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) ceph_decode_need(&p, end, pool_ns_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) extra_info.pool_ns =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) ceph_find_or_create_string(p, pool_ns_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) p += pool_ns_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) if (msg_version >= 9) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) struct ceph_timespec *btime;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) if (p + sizeof(*btime) > end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) btime = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) ceph_decode_timespec64(&extra_info.btime, btime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) p += sizeof(*btime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) ceph_decode_64_safe(&p, end, extra_info.change_attr, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) if (msg_version >= 11) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109) u32 flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) /* version >= 10 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) ceph_decode_32_safe(&p, end, flags, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) /* version >= 11 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) extra_info.dirstat_valid = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) ceph_decode_64_safe(&p, end, extra_info.nfiles, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) ceph_decode_64_safe(&p, end, extra_info.nsubdirs, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) /* lookup ino */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) inode = ceph_find_inode(mdsc->fsc->sb, vino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) dout(" op %s ino %llx.%llx inode %p\n", ceph_cap_op_name(op), vino.ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) vino.snap, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) inc_session_sequence(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) dout(" mds%d seq %lld cap seq %u\n", session->s_mds, session->s_seq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) (unsigned)seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) if (!inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) dout(" i don't have ino %llx\n", vino.ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) if (op == CEPH_CAP_OP_IMPORT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) cap = ceph_get_cap(mdsc, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) cap->cap_ino = vino.ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) cap->queue_release = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) cap->cap_id = le64_to_cpu(h->cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) cap->mseq = mseq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) cap->seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) cap->issue_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) __ceph_queue_cap_release(session, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) goto flush_cap_releases;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147) /* these will work even if we don't have a cap yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) switch (op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) case CEPH_CAP_OP_FLUSHSNAP_ACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) handle_cap_flushsnap_ack(inode, le64_to_cpu(msg->hdr.tid),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) h, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) case CEPH_CAP_OP_EXPORT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) handle_cap_export(inode, h, peer, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) goto done_unlocked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) case CEPH_CAP_OP_IMPORT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) realm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) if (snaptrace_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) down_write(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) ceph_update_snap_trace(mdsc, snaptrace,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) snaptrace + snaptrace_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) false, &realm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) downgrade_write(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) down_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) handle_cap_import(mdsc, inode, h, peer, session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) &cap, &extra_info.issued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) handle_cap_grant(inode, session, cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) h, msg->middle, &extra_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174) if (realm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) ceph_put_snap_realm(mdsc, realm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) goto done_unlocked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) /* the rest require a cap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) cap = __get_cap_for_mds(ceph_inode(inode), session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) if (!cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) dout(" no cap on %p ino %llx.%llx from mds%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) inode, ceph_ino(inode), ceph_snap(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) goto flush_cap_releases;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) /* note that each of these drops i_ceph_lock for us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) switch (op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) case CEPH_CAP_OP_REVOKE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) case CEPH_CAP_OP_GRANT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) __ceph_caps_issued(ci, &extra_info.issued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) extra_info.issued |= __ceph_caps_dirty(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) handle_cap_grant(inode, session, cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) h, msg->middle, &extra_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) goto done_unlocked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) case CEPH_CAP_OP_FLUSH_ACK:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) handle_cap_flush_ack(inode, le64_to_cpu(msg->hdr.tid),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) h, session, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205) case CEPH_CAP_OP_TRUNC:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) queue_trunc = handle_cap_trunc(inode, h, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) if (queue_trunc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) ceph_queue_vmtruncate(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) pr_err("ceph_handle_caps: unknown cap op %d %s\n", op,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) ceph_cap_op_name(op));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) done_unlocked:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221) ceph_put_string(extra_info.pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) /* avoid calling iput_final() in mds dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) ceph_async_iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) flush_cap_releases:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) * send any cap release message to try to move things
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229) * along for the mds (who clearly thinks we still have this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) * cap).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) ceph_flush_cap_releases(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) pr_err("ceph_handle_caps: corrupt message\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) ceph_msg_dump(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) * Delayed work handler to process end of delayed cap release LRU list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) * If new caps are added to the list while processing it, these won't get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) * processed in this run. In this case, the ci->i_hold_caps_max will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) * returned so that the work can be scheduled accordingly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) struct ceph_inode_info *ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) struct ceph_mount_options *opt = mdsc->fsc->mount_options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) unsigned long delay_max = opt->caps_wanted_delay_max * HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254) unsigned long loop_start = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) unsigned long delay = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257) dout("check_delayed_caps\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258) spin_lock(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) while (!list_empty(&mdsc->cap_delay_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) ci = list_first_entry(&mdsc->cap_delay_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) struct ceph_inode_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) i_cap_delay_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) dout("%s caps added recently. Exiting loop", __func__);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) delay = ci->i_hold_caps_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) time_before(jiffies, ci->i_hold_caps_max))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) list_del_init(&ci->i_cap_delay_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) inode = igrab(&ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) if (inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) spin_unlock(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) dout("check_delayed_caps on %p\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) ceph_check_caps(ci, 0, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) /* avoid calling iput_final() in tick thread */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) ceph_async_iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) spin_lock(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) spin_unlock(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) return delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) * Flush all dirty caps to the mds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) static void flush_dirty_session_caps(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) struct ceph_inode_info *ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) dout("flush_dirty_caps\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) while (!list_empty(&s->s_cap_dirty)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) ci = list_first_entry(&s->s_cap_dirty, struct ceph_inode_info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) i_dirty_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) inode = &ci->vfs_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) dout("flush_dirty_caps %p\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) dout("flush_dirty_caps done\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) static void iterate_sessions(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) void (*cb)(struct ceph_mds_session *))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) int mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) for (mds = 0; mds < mdsc->max_sessions; ++mds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) struct ceph_mds_session *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) if (!mdsc->sessions[mds])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) s = ceph_get_mds_session(mdsc->sessions[mds]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) cb(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340) iterate_sessions(mdsc, flush_dirty_session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) void __ceph_touch_fmode(struct ceph_inode_info *ci,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344) struct ceph_mds_client *mdsc, int fmode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) unsigned long now = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) if (fmode & CEPH_FILE_MODE_RD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) ci->i_last_rd = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) if (fmode & CEPH_FILE_MODE_WR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) ci->i_last_wr = now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351) /* queue periodic check */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352) if (fmode &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353) __ceph_is_any_real_caps(ci) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354) list_empty(&ci->i_cap_delay_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355) __cap_delay_requeue(mdsc, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361) int bits = (fmode << 1) | 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) bool already_opened = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) if (count == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) atomic64_inc(&mdsc->metric.opened_files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369) for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371) * If any of the mode ref is larger than 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) * that means it has been already opened by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) * others. Just skip checking the PIN ref.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) if (i && ci->i_nr_by_mode[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) already_opened = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) if (bits & (1 << i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) ci->i_nr_by_mode[i] += count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) if (!already_opened)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) percpu_counter_inc(&mdsc->metric.opened_inodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) * Drop open file reference. If we were the last open file,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) * we may need to release capabilities to the MDS (or schedule
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) * their delayed release).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392) void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394) struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) int bits = (fmode << 1) | 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) bool is_closed = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) if (count == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) atomic64_dec(&mdsc->metric.opened_files);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) for (i = 0; i < CEPH_FILE_MODE_BITS; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) if (bits & (1 << i)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) BUG_ON(ci->i_nr_by_mode[i] < count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) ci->i_nr_by_mode[i] -= count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) * If any of the mode ref is not 0 after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411) * decreased, that means it is still opened
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) * by others. Just skip checking the PIN ref.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) if (i && ci->i_nr_by_mode[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) is_closed = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418) if (is_closed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) percpu_counter_dec(&mdsc->metric.opened_inodes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424) * For a soon-to-be unlinked file, drop the LINK caps. If it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) * looks like the link count will hit 0, drop any other caps (other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) * than PIN) we don't specifically want (due to the file still being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) * open).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) int ceph_drop_caps_for_unlink(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) int drop = CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) if (inode->i_nlink == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) drop |= ~(__ceph_caps_wanted(ci) | CEPH_CAP_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) if (__ceph_caps_dirty(ci)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) struct ceph_mds_client *mdsc =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) ceph_inode_to_client(inode)->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441) __cap_delay_requeue_front(mdsc, ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) return drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449) * Helpers for embedding cap and dentry lease releases into mds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) * requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452) * @force is used by dentry_release (below) to force inclusion of a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) * record for the directory inode, even when there aren't any caps to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454) * drop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) int ceph_encode_inode_release(void **p, struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457) int mds, int drop, int unless, int force)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) struct ceph_mds_request_release *rel = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) int used, dirty;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466) used = __ceph_caps_used(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) dirty = __ceph_caps_dirty(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469) dout("encode_inode_release %p mds%d used|dirty %s drop %s unless %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) inode, mds, ceph_cap_string(used|dirty), ceph_cap_string(drop),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471) ceph_cap_string(unless));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473) /* only drop unused, clean caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) drop &= ~(used | dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) cap = __get_cap_for_mds(ci, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) if (cap && __cap_is_valid(cap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) unless &= cap->issued;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) if (unless) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480) if (unless & CEPH_CAP_AUTH_EXCL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) drop &= ~CEPH_CAP_AUTH_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) if (unless & CEPH_CAP_LINK_EXCL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) drop &= ~CEPH_CAP_LINK_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484) if (unless & CEPH_CAP_XATTR_EXCL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) drop &= ~CEPH_CAP_XATTR_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) if (unless & CEPH_CAP_FILE_EXCL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487) drop &= ~CEPH_CAP_FILE_SHARED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) if (force || (cap->issued & drop)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) if (cap->issued & drop) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) int wanted = __ceph_caps_wanted(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) dout("encode_inode_release %p cap %p "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) "%s -> %s, wanted %s -> %s\n", inode, cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) ceph_cap_string(cap->issued),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) ceph_cap_string(cap->issued & ~drop),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497) ceph_cap_string(cap->mds_wanted),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) ceph_cap_string(wanted));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500) cap->issued &= ~drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) cap->implemented &= ~drop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502) cap->mds_wanted = wanted;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503) if (cap == ci->i_auth_cap &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) !(wanted & CEPH_CAP_ANY_FILE_WR))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) ci->i_requested_max_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) dout("encode_inode_release %p cap %p %s"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508) " (force)\n", inode, cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) ceph_cap_string(cap->issued));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) rel->ino = cpu_to_le64(ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) rel->cap_id = cpu_to_le64(cap->cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) rel->seq = cpu_to_le32(cap->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) rel->issue_seq = cpu_to_le32(cap->issue_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) rel->mseq = cpu_to_le32(cap->mseq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517) rel->caps = cpu_to_le32(cap->implemented);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) rel->wanted = cpu_to_le32(cap->mds_wanted);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) rel->dname_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) rel->dname_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) *p += sizeof(*rel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) dout("encode_inode_release %p cap %p %s (noop)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) inode, cap, ceph_cap_string(cap->issued));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532) int ceph_encode_dentry_release(void **p, struct dentry *dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) int mds, int drop, int unless)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) struct dentry *parent = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) struct ceph_mds_request_release *rel = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538) struct ceph_dentry_info *di = ceph_dentry(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) int force = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) * force an record for the directory caps if we have a dentry lease.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) * this is racy (can't take i_ceph_lock and d_lock together), but it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) * doesn't have to be perfect; the mds will revoke anything we don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546) * release.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) spin_lock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) if (di->lease_session && di->lease_session->s_mds == mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) force = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) if (!dir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552) parent = dget(dentry->d_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) dir = d_inode(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) spin_unlock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) dput(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560) spin_lock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) if (ret && di->lease_session && di->lease_session->s_mds == mds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) dout("encode_dentry_release %p mds%d seq %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) dentry, mds, (int)di->lease_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) rel->dname_len = cpu_to_le32(dentry->d_name.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565) memcpy(*p, dentry->d_name.name, dentry->d_name.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) *p += dentry->d_name.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) rel->dname_seq = cpu_to_le32(di->lease_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568) __ceph_mdsc_drop_dentry_lease(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570) spin_unlock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) }