^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/ceph/ceph_debug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/wait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/debugfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/seq_file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/ratelimit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/bits.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/ktime.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include "super.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "mds_client.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <linux/ceph/ceph_features.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #include <linux/ceph/messenger.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #include <linux/ceph/decode.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/ceph/pagelist.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/ceph/auth.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <linux/ceph/debugfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define RECONNECT_MAX_SIZE (INT_MAX - PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * A cluster of MDS (metadata server) daemons is responsible for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * managing the file system namespace (the directory hierarchy and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * inodes) and for coordinating shared access to storage. Metadata is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * partitioning hierarchically across a number of servers, and that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * partition varies over time as the cluster adjusts the distribution
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * in order to balance load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * The MDS client is primarily responsible to managing synchronous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * metadata requests for operations like open, unlink, and so forth.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * If there is a MDS failure, we find out about it when we (possibly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * request and) receive a new MDS map, and can resubmit affected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * For the most part, though, we take advantage of a lossless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * communications channel to the MDS, and do not need to worry about
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * timing out or resubmitting requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * We maintain a stateful "session" with each MDS we interact with.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * Within each session, we sent periodic heartbeat messages to ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * any capabilities or leases we have been issues remain valid. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * the session times out and goes stale, our leases and capabilities
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * are no longer valid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) struct ceph_reconnect_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) struct ceph_mds_session *session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) int nr_caps, nr_realms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) struct ceph_pagelist *pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) unsigned msg_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) bool allow_multi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) static void __wake_requests(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) struct list_head *head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) static void ceph_cap_release_work(struct work_struct *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) static void ceph_cap_reclaim_work(struct work_struct *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) static const struct ceph_connection_operations mds_con_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * mds reply parsing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) static int parse_reply_info_quota(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) struct ceph_mds_reply_info_in *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) u8 struct_v, struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) u32 struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) ceph_decode_8_safe(p, end, struct_v, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) ceph_decode_8_safe(p, end, struct_compat, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) /* struct_v is expected to be >= 1. we only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * understand encoding with struct_compat == 1. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) if (!struct_v || struct_compat != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) ceph_decode_32_safe(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) ceph_decode_need(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) end = *p + struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) ceph_decode_64_safe(p, end, info->max_bytes, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) ceph_decode_64_safe(p, end, info->max_files, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) *p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * parse individual inode info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) static int parse_reply_info_in(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) struct ceph_mds_reply_info_in *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) u8 struct_v = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) if (features == (u64)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) u32 struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) u8 struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) ceph_decode_8_safe(p, end, struct_v, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) ceph_decode_8_safe(p, end, struct_compat, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) /* struct_v is expected to be >= 1. we only understand
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * encoding with struct_compat == 1. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) if (!struct_v || struct_compat != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) ceph_decode_32_safe(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) ceph_decode_need(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) end = *p + struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) ceph_decode_need(p, end, sizeof(struct ceph_mds_reply_inode), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) info->in = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) *p += sizeof(struct ceph_mds_reply_inode) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) sizeof(*info->in->fragtree.splits) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) le32_to_cpu(info->in->fragtree.nsplits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) ceph_decode_32_safe(p, end, info->symlink_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) ceph_decode_need(p, end, info->symlink_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) info->symlink = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) *p += info->symlink_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) ceph_decode_copy_safe(p, end, &info->dir_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) sizeof(info->dir_layout), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) ceph_decode_32_safe(p, end, info->xattr_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) ceph_decode_need(p, end, info->xattr_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) info->xattr_data = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) *p += info->xattr_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) if (features == (u64)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) /* inline data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) ceph_decode_64_safe(p, end, info->inline_version, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) ceph_decode_32_safe(p, end, info->inline_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) ceph_decode_need(p, end, info->inline_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) info->inline_data = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) *p += info->inline_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /* quota */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) err = parse_reply_info_quota(p, end, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) /* pool namespace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) ceph_decode_32_safe(p, end, info->pool_ns_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) if (info->pool_ns_len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) ceph_decode_need(p, end, info->pool_ns_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) info->pool_ns_data = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) *p += info->pool_ns_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /* btime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) ceph_decode_need(p, end, sizeof(info->btime), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) ceph_decode_copy(p, &info->btime, sizeof(info->btime));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) /* change attribute */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) ceph_decode_64_safe(p, end, info->change_attr, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) /* dir pin */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) if (struct_v >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) ceph_decode_32_safe(p, end, info->dir_pin, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) info->dir_pin = -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /* snapshot birth time, remains zero for v<=2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) if (struct_v >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) ceph_decode_need(p, end, sizeof(info->snap_btime), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) ceph_decode_copy(p, &info->snap_btime,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) sizeof(info->snap_btime));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) memset(&info->snap_btime, 0, sizeof(info->snap_btime));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) *p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) ceph_decode_64_safe(p, end, info->inline_version, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) ceph_decode_32_safe(p, end, info->inline_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) ceph_decode_need(p, end, info->inline_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) info->inline_data = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) *p += info->inline_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) } else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) info->inline_version = CEPH_INLINE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) if (features & CEPH_FEATURE_MDS_QUOTA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) err = parse_reply_info_quota(p, end, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) info->max_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) info->max_files = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) info->pool_ns_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) info->pool_ns_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) ceph_decode_32_safe(p, end, info->pool_ns_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) if (info->pool_ns_len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) ceph_decode_need(p, end, info->pool_ns_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) info->pool_ns_data = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) *p += info->pool_ns_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) if (features & CEPH_FEATURE_FS_BTIME) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) ceph_decode_need(p, end, sizeof(info->btime), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) ceph_decode_copy(p, &info->btime, sizeof(info->btime));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) ceph_decode_64_safe(p, end, info->change_attr, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) info->dir_pin = -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) /* info->snap_btime remains zero */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) out_bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) static int parse_reply_info_dir(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) struct ceph_mds_reply_dirfrag **dirfrag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (features == (u64)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) u8 struct_v, struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) u32 struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) ceph_decode_8_safe(p, end, struct_v, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) ceph_decode_8_safe(p, end, struct_compat, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) /* struct_v is expected to be >= 1. we only understand
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * encoding whose struct_compat == 1. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) if (!struct_v || struct_compat != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) ceph_decode_32_safe(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) ceph_decode_need(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) end = *p + struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) ceph_decode_need(p, end, sizeof(**dirfrag), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) *dirfrag = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) *p += sizeof(**dirfrag) + sizeof(u32) * le32_to_cpu((*dirfrag)->ndist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) if (unlikely(*p > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) if (features == (u64)-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) *p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) static int parse_reply_info_lease(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) struct ceph_mds_reply_lease **lease,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) if (features == (u64)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) u8 struct_v, struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) u32 struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) ceph_decode_8_safe(p, end, struct_v, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) ceph_decode_8_safe(p, end, struct_compat, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) /* struct_v is expected to be >= 1. we only understand
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * encoding whose struct_compat == 1. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) if (!struct_v || struct_compat != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) ceph_decode_32_safe(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) ceph_decode_need(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) end = *p + struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) ceph_decode_need(p, end, sizeof(**lease), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) *lease = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) *p += sizeof(**lease);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) if (features == (u64)-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) *p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) * parse a normal reply, which may contain a (dir+)dentry and/or a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) * target inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) static int parse_reply_info_trace(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) if (info->head->is_dentry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) err = parse_reply_info_in(p, end, &info->diri, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) err = parse_reply_info_dir(p, end, &info->dirfrag, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) ceph_decode_32_safe(p, end, info->dname_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) ceph_decode_need(p, end, info->dname_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) info->dname = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) *p += info->dname_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) err = parse_reply_info_lease(p, end, &info->dlease, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) if (info->head->is_target) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) err = parse_reply_info_in(p, end, &info->targeti, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) if (unlikely(*p != end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) out_bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) pr_err("problem parsing mds trace %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) * parse readdir results
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) static int parse_reply_info_readdir(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) u32 num, i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) err = parse_reply_info_dir(p, end, &info->dir_dir, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) ceph_decode_need(p, end, sizeof(num) + 2, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) u16 flags = ceph_decode_16(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) info->dir_end = !!(flags & CEPH_READDIR_FRAG_END);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) info->offset_hash = !!(flags & CEPH_READDIR_OFFSET_HASH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) if (num == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) BUG_ON(!info->dir_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) if ((unsigned long)(info->dir_entries + num) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) (unsigned long)info->dir_entries + info->dir_buf_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) pr_err("dir contents are larger than expected\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) info->dir_nr = num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) while (num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) /* dentry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) ceph_decode_32_safe(p, end, rde->name_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) ceph_decode_need(p, end, rde->name_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) rde->name = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) *p += rde->name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) dout("parsed dir dname '%.*s'\n", rde->name_len, rde->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) /* dentry lease */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) err = parse_reply_info_lease(p, end, &rde->lease, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) /* inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) err = parse_reply_info_in(p, end, &rde->inode, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) /* ceph_readdir_prepopulate() will update it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) rde->offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) num--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) /* Skip over any unrecognized fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) *p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) out_bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) pr_err("problem parsing dir contents %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * parse fcntl F_GETLK results
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) static int parse_reply_info_filelock(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) if (*p + sizeof(*info->filelock_reply) > end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) info->filelock_reply = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) /* Skip over any unrecognized fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) *p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) #if BITS_PER_LONG == 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) #define DELEGATED_INO_AVAILABLE xa_mk_value(1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) static int ceph_parse_deleg_inos(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) u32 sets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) ceph_decode_32_safe(p, end, sets, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) dout("got %u sets of delegated inodes\n", sets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) while (sets--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) u64 start, len, ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) ceph_decode_64_safe(p, end, start, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) ceph_decode_64_safe(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) /* Don't accept a delegation of system inodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) if (start < CEPH_INO_SYSTEM_BASE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) pr_warn_ratelimited("ceph: ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) start, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) while (len--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) int err = xa_insert(&s->s_delegated_inos, ino = start++,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) DELEGATED_INO_AVAILABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) if (!err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) dout("added delegated inode 0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) start - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) } else if (err == -EBUSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) pr_warn("ceph: MDS delegated inode 0x%llx more than once.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) start - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) u64 ceph_get_deleg_ino(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) unsigned long ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) void *val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) xa_for_each(&s->s_delegated_inos, ino, val) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) val = xa_erase(&s->s_delegated_inos, ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) if (val == DELEGATED_INO_AVAILABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) return ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) int ceph_restore_deleg_ino(struct ceph_mds_session *s, u64 ino)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) return xa_insert(&s->s_delegated_inos, ino, DELEGATED_INO_AVAILABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) #else /* BITS_PER_LONG == 64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) * FIXME: xarrays can't handle 64-bit indexes on a 32-bit arch. For now, just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) * ignore delegated_inos on 32 bit arch. Maybe eventually add xarrays for top
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) * and bottom words?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) static int ceph_parse_deleg_inos(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) u32 sets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) ceph_decode_32_safe(p, end, sets, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) if (sets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) ceph_decode_skip_n(p, end, sets * 2 * sizeof(__le64), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) u64 ceph_get_deleg_ino(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) int ceph_restore_deleg_ino(struct ceph_mds_session *s, u64 ino)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) #endif /* BITS_PER_LONG == 64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) * parse create results
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) static int parse_reply_info_create(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) u64 features, struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) if (features == (u64)-1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) (features & CEPH_FEATURE_REPLY_CREATE_INODE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) if (*p == end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) /* Malformed reply? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) info->has_create_ino = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) } else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) u8 struct_v, struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) info->has_create_ino = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) ceph_decode_8_safe(p, end, struct_v, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) ceph_decode_8_safe(p, end, struct_compat, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) ceph_decode_32_safe(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) ceph_decode_64_safe(p, end, info->ino, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) ret = ceph_parse_deleg_inos(p, end, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) /* legacy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) ceph_decode_64_safe(p, end, info->ino, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) info->has_create_ino = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) if (*p != end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) /* Skip over any unrecognized fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) *p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) * parse extra results
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) static int parse_reply_info_extra(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) u64 features, struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) u32 op = le32_to_cpu(info->head->op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) if (op == CEPH_MDS_OP_GETFILELOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) return parse_reply_info_filelock(p, end, info, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) return parse_reply_info_readdir(p, end, info, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) else if (op == CEPH_MDS_OP_CREATE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) return parse_reply_info_create(p, end, info, features, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) * parse entire mds reply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) void *p, *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) info->head = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) p = msg->front.iov_base + sizeof(struct ceph_mds_reply_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) end = p + msg->front.iov_len - sizeof(struct ceph_mds_reply_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) /* trace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) ceph_decode_32_safe(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) ceph_decode_need(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) err = parse_reply_info_trace(&p, p+len, info, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) /* extra */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) ceph_decode_32_safe(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) ceph_decode_need(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) err = parse_reply_info_extra(&p, p+len, info, features, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) /* snap blob */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) ceph_decode_32_safe(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) info->snapblob_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) info->snapblob = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) if (p != end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) out_bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) pr_err("mds parse_reply err %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) if (!info->dir_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) * sessions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) const char *ceph_session_state_name(int s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) switch (s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) case CEPH_MDS_SESSION_NEW: return "new";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) case CEPH_MDS_SESSION_OPENING: return "opening";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) case CEPH_MDS_SESSION_OPEN: return "open";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) case CEPH_MDS_SESSION_HUNG: return "hung";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) case CEPH_MDS_SESSION_CLOSING: return "closing";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) case CEPH_MDS_SESSION_CLOSED: return "closed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) case CEPH_MDS_SESSION_RESTARTING: return "restarting";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) case CEPH_MDS_SESSION_REJECTED: return "rejected";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) default: return "???";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) struct ceph_mds_session *ceph_get_mds_session(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) if (refcount_inc_not_zero(&s->s_ref)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) dout("mdsc get_session %p %d -> %d\n", s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) dout("mdsc get_session %p 0 -- FAIL\n", s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) void ceph_put_mds_session(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) if (IS_ERR_OR_NULL(s))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) dout("mdsc put_session %p %d -> %d\n", s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) refcount_read(&s->s_ref), refcount_read(&s->s_ref)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) if (refcount_dec_and_test(&s->s_ref)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) if (s->s_auth.authorizer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) ceph_auth_destroy_authorizer(s->s_auth.authorizer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) WARN_ON(mutex_is_locked(&s->s_mutex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) xa_destroy(&s->s_delegated_inos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) kfree(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) int mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) return ceph_get_mds_session(mdsc->sessions[mds]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) static bool __have_session(struct ceph_mds_client *mdsc, int mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) static int __verify_registered_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) if (s->s_mds >= mdsc->max_sessions ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) mdsc->sessions[s->s_mds] != s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) * create+register a new session for given mds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) * called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) int mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) struct ceph_mds_session *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) if (mds >= mdsc->mdsmap->possible_max_rank)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) s = kzalloc(sizeof(*s), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) if (mds >= mdsc->max_sessions) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) int newmax = 1 << get_count_order(mds + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) struct ceph_mds_session **sa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) dout("%s: realloc to %d\n", __func__, newmax);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) if (!sa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) goto fail_realloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) if (mdsc->sessions) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) memcpy(sa, mdsc->sessions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) mdsc->max_sessions * sizeof(void *));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) kfree(mdsc->sessions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) mdsc->sessions = sa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) mdsc->max_sessions = newmax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) dout("%s: mds%d\n", __func__, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) s->s_mdsc = mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) s->s_mds = mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) s->s_state = CEPH_MDS_SESSION_NEW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) s->s_ttl = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) s->s_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) mutex_init(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) spin_lock_init(&s->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) s->s_cap_gen = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) s->s_cap_ttl = jiffies - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) spin_lock_init(&s->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) s->s_renew_requested = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) s->s_renew_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) INIT_LIST_HEAD(&s->s_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) s->s_nr_caps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) refcount_set(&s->s_ref, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) INIT_LIST_HEAD(&s->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) INIT_LIST_HEAD(&s->s_unsafe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) xa_init(&s->s_delegated_inos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) s->s_num_cap_releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) s->s_cap_reconnect = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) s->s_cap_iterator = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) INIT_LIST_HEAD(&s->s_cap_releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) INIT_WORK(&s->s_cap_release_work, ceph_cap_release_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) INIT_LIST_HEAD(&s->s_cap_dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) INIT_LIST_HEAD(&s->s_cap_flushing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) mdsc->sessions[mds] = s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) atomic_inc(&mdsc->num_sessions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) refcount_inc(&s->s_ref); /* one ref to sessions[], one to caller */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) fail_realloc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) kfree(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) static void __unregister_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) dout("__unregister_session mds%d %p\n", s->s_mds, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) BUG_ON(mdsc->sessions[s->s_mds] != s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) mdsc->sessions[s->s_mds] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) ceph_con_close(&s->s_con);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) atomic_dec(&mdsc->num_sessions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) * drop session refs in request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) * should be last request ref, or hold mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) static void put_request_session(struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) if (req->r_session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) ceph_put_mds_session(req->r_session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) req->r_session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) void ceph_mdsc_release_request(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) struct ceph_mds_request *req = container_of(kref,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) struct ceph_mds_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) r_kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) ceph_mdsc_release_dir_caps_no_check(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) destroy_reply_info(&req->r_reply_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) if (req->r_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) ceph_msg_put(req->r_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) if (req->r_reply)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) ceph_msg_put(req->r_reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) if (req->r_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) /* avoid calling iput_final() in mds dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) ceph_async_iput(req->r_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) if (req->r_parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) ceph_async_iput(req->r_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) ceph_async_iput(req->r_target_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) if (req->r_dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) dput(req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) if (req->r_old_dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) dput(req->r_old_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) if (req->r_old_dentry_dir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) * track (and drop pins for) r_old_dentry_dir
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) * separately, since r_old_dentry's d_parent may have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) * changed between the dir mutex being dropped and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) * this request being freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) CEPH_CAP_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) ceph_async_iput(req->r_old_dentry_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) kfree(req->r_path1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) kfree(req->r_path2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) if (req->r_pagelist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) ceph_pagelist_release(req->r_pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) put_request_session(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) WARN_ON_ONCE(!list_empty(&req->r_wait));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) kmem_cache_free(ceph_mds_request_cachep, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) DEFINE_RB_FUNCS(request, struct ceph_mds_request, r_tid, r_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) * lookup session, bump ref if found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) * called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) static struct ceph_mds_request *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) lookup_get_request(struct ceph_mds_client *mdsc, u64 tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) req = lookup_request(&mdsc->request_tree, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) if (req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) ceph_mdsc_get_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) return req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) * Register an in-flight request, and assign a tid. Link to directory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) * are modifying (if any).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) * Called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) static void __register_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) struct inode *dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) req->r_tid = ++mdsc->last_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) if (req->r_num_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) req->r_num_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) pr_err("__register_request %p "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) "failed to reserve caps: %d\n", req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) /* set req->r_err to fail early from __do_request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) req->r_err = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) dout("__register_request %p tid %lld\n", req, req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) ceph_mdsc_get_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) insert_request(&mdsc->request_tree, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) req->r_uid = current_fsuid();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) req->r_gid = current_fsgid();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) mdsc->oldest_tid = req->r_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) if (dir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) struct ceph_inode_info *ci = ceph_inode(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) ihold(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) req->r_unsafe_dir = dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) spin_lock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) spin_unlock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) static void __unregister_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) dout("__unregister_request %p tid %lld\n", req, req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) /* Never leave an unregistered request on an unsafe list! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) list_del_init(&req->r_unsafe_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) if (req->r_tid == mdsc->oldest_tid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) struct rb_node *p = rb_next(&req->r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) mdsc->oldest_tid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) struct ceph_mds_request *next_req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) rb_entry(p, struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) if (next_req->r_op != CEPH_MDS_OP_SETFILELOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) mdsc->oldest_tid = next_req->r_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) erase_request(&mdsc->request_tree, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) if (req->r_unsafe_dir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) spin_lock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) list_del_init(&req->r_unsafe_dir_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) spin_unlock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) if (req->r_target_inode &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) spin_lock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) list_del_init(&req->r_unsafe_target_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) spin_unlock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) if (req->r_unsafe_dir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) /* avoid calling iput_final() in mds dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) ceph_async_iput(req->r_unsafe_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) req->r_unsafe_dir = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) complete_all(&req->r_safe_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) * Walk back up the dentry tree until we hit a dentry representing a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) * non-snapshot inode. We do this using the rcu_read_lock (which must be held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) * when calling this) to ensure that the objects won't disappear while we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) * working with them. Once we hit a candidate dentry, we attempt to take a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) * reference to it, and return that as the result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) static struct inode *get_nonsnap_parent(struct dentry *dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) struct inode *inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) while (dentry && !IS_ROOT(dentry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) inode = d_inode_rcu(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) if (!inode || ceph_snap(inode) == CEPH_NOSNAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) dentry = dentry->d_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) if (inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) inode = igrab(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) return inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) * Choose mds to send request to next. If there is a hint set in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) * request (e.g., due to a prior forward hint from the mds), use that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) * Otherwise, consult frag tree and/or caps to identify the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) * appropriate mds. If all else fails, choose randomly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) * Called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) static int __choose_mds(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) bool *random)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) struct ceph_inode_info *ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) int mode = req->r_direct_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) int mds = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) u32 hash = req->r_direct_hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) if (random)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) *random = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) * is there a specific mds we should try? ignore hint if we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) * no session and the mds is not up (active or recovering).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) if (req->r_resend_mds >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) (__have_session(mdsc, req->r_resend_mds) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) dout("%s using resend_mds mds%d\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) req->r_resend_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) return req->r_resend_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) if (mode == USE_RANDOM_MDS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) goto random;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) if (req->r_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) if (ceph_snap(req->r_inode) != CEPH_SNAPDIR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) inode = req->r_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) /* req->r_dentry is non-null for LSSNAP request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) inode = get_nonsnap_parent(req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) dout("%s using snapdir's parent %p\n", __func__, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) } else if (req->r_dentry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) /* ignore race with rename; old or new d_parent is okay */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) struct dentry *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) struct inode *dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) parent = READ_ONCE(req->r_dentry->d_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) dir = req->r_parent ? : d_inode_rcu(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) if (!dir || dir->i_sb != mdsc->fsc->sb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) /* not this fs or parent went negative */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) inode = d_inode(req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) if (inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) } else if (ceph_snap(dir) != CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) /* direct snapped/virtual snapdir requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) * based on parent dir inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) inode = get_nonsnap_parent(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) dout("%s using nonsnap parent %p\n", __func__, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) /* dentry target */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) inode = d_inode(req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) if (!inode || mode == USE_AUTH_MDS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) /* dir + name */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) inode = igrab(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) hash = ceph_dentry_hash(dir, req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) is_hash = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) dout("%s %p is_hash=%d (0x%x) mode %d\n", __func__, inode, (int)is_hash,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) hash, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) if (!inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) goto random;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) if (is_hash && S_ISDIR(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) struct ceph_inode_frag frag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) int found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) ceph_choose_frag(ci, hash, &frag, &found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) if (found) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) if (mode == USE_ANY_MDS && frag.ndist > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) u8 r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) /* choose a random replica */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) get_random_bytes(&r, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) r %= frag.ndist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) mds = frag.dist[r];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) dout("%s %p %llx.%llx frag %u mds%d (%d/%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) __func__, inode, ceph_vinop(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) frag.frag, mds, (int)r, frag.ndist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) CEPH_MDS_STATE_ACTIVE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) /* since this file/dir wasn't known to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) * replicated, then we want to look for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) * authoritative mds. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) if (frag.mds >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) /* choose auth mds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) mds = frag.mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) dout("%s %p %llx.%llx frag %u mds%d (auth)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) __func__, inode, ceph_vinop(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) frag.frag, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) CEPH_MDS_STATE_ACTIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) if (!ceph_mdsmap_is_laggy(mdsc->mdsmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) mds))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) mode = USE_AUTH_MDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) if (mode == USE_AUTH_MDS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) cap = ci->i_auth_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) if (!cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) ceph_async_iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) goto random;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) mds = cap->session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) dout("%s %p %llx.%llx mds%d (%scap %p)\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) inode, ceph_vinop(inode), mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) cap == ci->i_auth_cap ? "auth " : "", cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) /* avoid calling iput_final() while holding mdsc->mutex or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) * in mds dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) ceph_async_iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) return mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) random:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) if (random)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) *random = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) dout("%s chose random mds%d\n", __func__, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) return mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) * session messages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) static struct ceph_msg *create_session_msg(u32 op, u64 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) struct ceph_mds_session_head *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) if (!msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) pr_err("create_session_msg ENOMEM creating msg\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) h = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) h->op = cpu_to_le32(op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) h->seq = cpu_to_le64(seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) return msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) static const unsigned char feature_bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) #define FEATURE_BYTES(c) (DIV_ROUND_UP((size_t)feature_bits[c - 1] + 1, 64) * 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) static int encode_supported_features(void **p, void *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) static const size_t count = ARRAY_SIZE(feature_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) if (count > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) size_t size = FEATURE_BYTES(count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) if (WARN_ON_ONCE(*p + 4 + size > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) ceph_encode_32(p, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) memset(*p, 0, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) for (i = 0; i < count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) ((unsigned char*)(*p))[i / 8] |= BIT(feature_bits[i] % 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) *p += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) if (WARN_ON_ONCE(*p + 4 > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) ceph_encode_32(p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) #define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) static int encode_metric_spec(void **p, void *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) static const size_t count = ARRAY_SIZE(metric_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) /* header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) if (WARN_ON_ONCE(*p + 2 > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) ceph_encode_8(p, 1); /* version */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) ceph_encode_8(p, 1); /* compat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) if (count > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) size_t size = METRIC_BYTES(count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) if (WARN_ON_ONCE(*p + 4 + 4 + size > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) /* metric spec info length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) ceph_encode_32(p, 4 + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) /* metric spec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) ceph_encode_32(p, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) memset(*p, 0, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) for (i = 0; i < count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) ((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) *p += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) if (WARN_ON_ONCE(*p + 4 + 4 > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) /* metric spec info length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) ceph_encode_32(p, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) /* metric spec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) ceph_encode_32(p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) * session message, specialization for CEPH_SESSION_REQUEST_OPEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) * to include additional client metadata fields.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) struct ceph_mds_session_head *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) int i = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) int extra_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) int metadata_key_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) struct ceph_options *opt = mdsc->fsc->client->options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) struct ceph_mount_options *fsopt = mdsc->fsc->mount_options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) size_t size, count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) void *p, *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) const char* metadata[][2] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) {"hostname", mdsc->nodename},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) {"kernel_version", init_utsname()->release},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) {"entity_id", opt->name ? : ""},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) {"root", fsopt->server_path ? : "/"},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) {NULL, NULL}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) /* Calculate serialized length of metadata */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) extra_bytes = 4; /* map length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) for (i = 0; metadata[i][0]; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) extra_bytes += 8 + strlen(metadata[i][0]) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) strlen(metadata[i][1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) metadata_key_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) /* supported feature */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) count = ARRAY_SIZE(feature_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) if (count > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) size = FEATURE_BYTES(count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) extra_bytes += 4 + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) /* metric spec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) count = ARRAY_SIZE(metric_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) if (count > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) size = METRIC_BYTES(count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) extra_bytes += 2 + 4 + 4 + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) /* Allocate the message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) if (!msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) pr_err("create_session_msg ENOMEM creating msg\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) p = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) end = p + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) h = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) h->seq = cpu_to_le64(seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) * Serialize client metadata into waiting buffer space, using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) * the format that userspace expects for map<string, string>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) * ClientSession messages with metadata are v4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) msg->hdr.version = cpu_to_le16(4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) msg->hdr.compat_version = cpu_to_le16(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) /* The write pointer, following the session_head structure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) p += sizeof(*h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) /* Number of entries in the map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) ceph_encode_32(&p, metadata_key_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) /* Two length-prefixed strings for each entry in the map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) for (i = 0; metadata[i][0]; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) size_t const key_len = strlen(metadata[i][0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) size_t const val_len = strlen(metadata[i][1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) ceph_encode_32(&p, key_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) memcpy(p, metadata[i][0], key_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) p += key_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) ceph_encode_32(&p, val_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) memcpy(p, metadata[i][1], val_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) p += val_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) ret = encode_supported_features(&p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) pr_err("encode_supported_features failed!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) ceph_msg_put(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) ret = encode_metric_spec(&p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) pr_err("encode_metric_spec failed!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) ceph_msg_put(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) msg->front.iov_len = p - msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) return msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) * send session open request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) static int __open_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) int mstate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) /* wait for mds to go active? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) dout("open_session to mds%d (%s)\n", mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) ceph_mds_state_name(mstate));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) session->s_state = CEPH_MDS_SESSION_OPENING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) session->s_renew_requested = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) /* send connect message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) msg = create_session_open_msg(mdsc, session->s_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) if (IS_ERR(msg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) return PTR_ERR(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) * open sessions for any export targets for the given mds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) static struct ceph_mds_session *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) __open_export_target_session(struct ceph_mds_client *mdsc, int target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) struct ceph_mds_session *session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) session = __ceph_lookup_mds_session(mdsc, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) if (!session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) session = register_session(mdsc, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) if (IS_ERR(session))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) return session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) if (session->s_state == CEPH_MDS_SESSION_NEW ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) session->s_state == CEPH_MDS_SESSION_CLOSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) ret = __open_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) return session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) struct ceph_mds_session *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) struct ceph_mds_session *session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) dout("open_export_target_session to mds%d\n", target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) session = __open_export_target_session(mdsc, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) return session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) struct ceph_mds_info *mi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) struct ceph_mds_session *ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) int i, mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) if (mds >= mdsc->mdsmap->possible_max_rank)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) mi = &mdsc->mdsmap->m_info[mds];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) dout("open_export_target_sessions for mds%d (%d targets)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) session->s_mds, mi->num_export_targets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) for (i = 0; i < mi->num_export_targets; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) ts = __open_export_target_session(mdsc, mi->export_targets[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) ceph_put_mds_session(ts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) __open_export_target_sessions(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) * session caps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) static void detach_cap_releases(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) struct list_head *target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) lockdep_assert_held(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) list_splice_init(&session->s_cap_releases, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) session->s_num_cap_releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) dout("dispose_cap_releases mds%d\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) static void dispose_cap_releases(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) struct list_head *dispose)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) while (!list_empty(dispose)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) /* zero out the in-progress message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) cap = list_first_entry(dispose, struct ceph_cap, session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) list_del(&cap->session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) ceph_put_cap(mdsc, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) static void cleanup_session_requests(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) dout("cleanup_session_requests mds%d\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) while (!list_empty(&session->s_unsafe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) req = list_first_entry(&session->s_unsafe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) struct ceph_mds_request, r_unsafe_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) pr_warn_ratelimited(" dropping unsafe request %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) if (req->r_target_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) mapping_set_error(req->r_target_inode->i_mapping, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) if (req->r_unsafe_dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) mapping_set_error(req->r_unsafe_dir->i_mapping, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) __unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) /* zero r_attempts, so kick_requests() will re-send requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) p = rb_first(&mdsc->request_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) req = rb_entry(p, struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) if (req->r_session &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) req->r_session->s_mds == session->s_mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) req->r_attempts = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) * Helper to safely iterate over all caps associated with a session, with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) * special care taken to handle a racing __ceph_remove_cap().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) * Caller must hold session s_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) int ceph_iterate_session_caps(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) int (*cb)(struct inode *, struct ceph_cap *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) void *), void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) struct list_head *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) struct inode *inode, *last_inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) struct ceph_cap *old_cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) dout("iterate_session_caps %p mds%d\n", session, session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) p = session->s_caps.next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) while (p != &session->s_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) cap = list_entry(p, struct ceph_cap, session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) inode = igrab(&cap->ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) if (!inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) p = p->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) session->s_cap_iterator = cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) if (last_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) /* avoid calling iput_final() while holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) * s_mutex or in mds dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) ceph_async_iput(last_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) last_inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) if (old_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) ceph_put_cap(session->s_mdsc, old_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) old_cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) ret = cb(inode, cap, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) last_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) p = p->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) if (!cap->ci) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) dout("iterate_session_caps finishing cap %p removal\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) BUG_ON(cap->session != session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) cap->session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) list_del_init(&cap->session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) session->s_nr_caps--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) atomic64_dec(&session->s_mdsc->metric.total_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) if (cap->queue_release)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) __ceph_queue_cap_release(session, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) old_cap = cap; /* put_cap it w/o locks held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) session->s_cap_iterator = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) ceph_async_iput(last_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) if (old_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) ceph_put_cap(session->s_mdsc, old_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) struct ceph_cap_snap *capsnap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) int capsnap_release = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) while (!list_empty(&ci->i_cap_snaps)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) capsnap = list_first_entry(&ci->i_cap_snaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) struct ceph_cap_snap, ci_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) __ceph_remove_capsnap(inode, capsnap, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) ceph_put_snap_context(capsnap->context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) ceph_put_cap_snap(capsnap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) capsnap_release++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) wake_up_all(&mdsc->cap_flushing_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) return capsnap_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) struct ceph_mds_client *mdsc = fsc->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) LIST_HEAD(to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) bool dirty_dropped = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) bool invalidate = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) int capsnap_release = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) dout("removing cap %p, ci is %p, inode is %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) cap, ci, &ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) __ceph_remove_cap(cap, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) if (!ci->i_auth_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) struct ceph_cap_flush *cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) if (inode->i_data.nrpages > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) invalidate = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) if (ci->i_wrbuffer_ref > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) mapping_set_error(&inode->i_data, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) while (!list_empty(&ci->i_cap_flush_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) cf = list_first_entry(&ci->i_cap_flush_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) struct ceph_cap_flush, i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) list_move(&cf->i_list, &to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) list_for_each_entry(cf, &to_remove, i_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) list_del_init(&cf->g_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) if (!list_empty(&ci->i_dirty_item)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) pr_warn_ratelimited(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) " dropping dirty %s state for %p %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) ceph_cap_string(ci->i_dirty_caps),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) inode, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) ci->i_dirty_caps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) list_del_init(&ci->i_dirty_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) dirty_dropped = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) if (!list_empty(&ci->i_flushing_item)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) pr_warn_ratelimited(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) " dropping dirty+flushing %s state for %p %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) ceph_cap_string(ci->i_flushing_caps),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) inode, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) ci->i_flushing_caps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) list_del_init(&ci->i_flushing_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) mdsc->num_cap_flushing--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) dirty_dropped = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) if (dirty_dropped) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) mapping_set_error(inode->i_mapping, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) if (ci->i_wrbuffer_ref_head == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) ci->i_wr_ref == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) ci->i_dirty_caps == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) ci->i_flushing_caps == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) ceph_put_snap_context(ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) ci->i_head_snapc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) if (atomic_read(&ci->i_filelock_ref) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) /* make further file lock syscall return -EIO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) pr_warn_ratelimited(" dropping file locks for %p %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) inode, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) ci->i_prealloc_cap_flush = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) if (!list_empty(&ci->i_cap_snaps))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) capsnap_release = remove_capsnaps(mdsc, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) while (!list_empty(&to_remove)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) struct ceph_cap_flush *cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) cf = list_first_entry(&to_remove,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) struct ceph_cap_flush, i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) list_del_init(&cf->i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) if (!cf->is_capsnap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) ceph_free_cap_flush(cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) if (invalidate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) ceph_queue_invalidate(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) if (dirty_dropped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) while (capsnap_release--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) * caller must hold session s_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) static void remove_session_caps(struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) struct ceph_fs_client *fsc = session->s_mdsc->fsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) struct super_block *sb = fsc->sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) LIST_HEAD(dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) dout("remove_session_caps on %p\n", session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) ceph_iterate_session_caps(session, remove_session_caps_cb, fsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) wake_up_all(&fsc->mdsc->cap_flushing_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) if (session->s_nr_caps > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) struct ceph_cap *cap, *prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) struct ceph_vino vino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) * iterate_session_caps() skips inodes that are being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) * deleted, we need to wait until deletions are complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) * __wait_on_freeing_inode() is designed for the job,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) * but it is not exported, so use lookup inode function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) * to access it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) while (!list_empty(&session->s_caps)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) cap = list_entry(session->s_caps.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) struct ceph_cap, session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) if (cap == prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) prev = cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) vino = cap->ci->i_vino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) inode = ceph_find_inode(sb, vino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) /* avoid calling iput_final() while holding s_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) ceph_async_iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) // drop cap expires and unlock s_cap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) detach_cap_releases(session, &dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) BUG_ON(session->s_nr_caps > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) BUG_ON(!list_empty(&session->s_cap_flushing));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) dispose_cap_releases(session->s_mdsc, &dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) RECONNECT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) RENEWCAPS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) FORCE_RO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) * wake up any threads waiting on this session's caps. if the cap is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) * old (didn't get renewed on the client reconnect), remove it now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) * caller must hold s_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) unsigned long ev = (unsigned long)arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) if (ev == RECONNECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) ci->i_wanted_max_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) ci->i_requested_max_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) } else if (ev == RENEWCAPS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) if (cap->cap_gen < cap->session->s_cap_gen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) /* mds did not re-issue stale cap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) cap->issued = cap->implemented = CEPH_CAP_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) } else if (ev == FORCE_RO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) ceph_iterate_session_caps(session, wake_up_session_cb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) (void *)(unsigned long)ev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) * Send periodic message to MDS renewing all currently held caps. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) * ack will reset the expiration for all caps from this session.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) * caller holds s_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) static int send_renew_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) int state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) if (time_after_eq(jiffies, session->s_cap_ttl) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) time_after_eq(session->s_cap_ttl, session->s_renew_requested))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) pr_info("mds%d caps stale\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) session->s_renew_requested = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) /* do not try to renew caps until a recovering mds has reconnected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) * with its clients. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) state = ceph_mdsmap_get_state(mdsc->mdsmap, session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) if (state < CEPH_MDS_STATE_RECONNECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) dout("send_renew_caps ignoring mds%d (%s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) session->s_mds, ceph_mds_state_name(state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) ceph_mds_state_name(state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) ++session->s_renew_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) if (!msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) static int send_flushmsg_ack(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) struct ceph_mds_session *session, u64 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) session->s_mds, ceph_session_state_name(session->s_state), seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) if (!msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) * Note new cap ttl, and any transition from stale -> not stale (fresh?).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) * Called under session->s_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) static void renewed_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) struct ceph_mds_session *session, int is_renew)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) int was_stale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) int wake = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) was_stale = is_renew && time_after_eq(jiffies, session->s_cap_ttl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) session->s_cap_ttl = session->s_renew_requested +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) mdsc->mdsmap->m_session_timeout*HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) if (was_stale) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) if (time_before(jiffies, session->s_cap_ttl)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) pr_info("mds%d caps renewed\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) wake = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) pr_info("mds%d caps still stale\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) dout("renewed_caps mds%d ttl now %lu, was %s, now %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) session->s_mds, session->s_cap_ttl, was_stale ? "stale" : "fresh",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) if (wake)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) wake_up_session_caps(session, RENEWCAPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) * send a session close request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) static int request_close_session(struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) dout("request_close_session mds%d state %s seq %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) session->s_mds, ceph_session_state_name(session->s_state),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) session->s_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) if (!msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) * Called with s_mutex held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) static int __close_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) if (session->s_state >= CEPH_MDS_SESSION_CLOSING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) session->s_state = CEPH_MDS_SESSION_CLOSING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) return request_close_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) static bool drop_negative_children(struct dentry *dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) struct dentry *child;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) bool all_negative = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) if (!d_is_dir(dentry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) spin_lock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) list_for_each_entry(child, &dentry->d_subdirs, d_child) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) if (d_really_is_positive(child)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) all_negative = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) spin_unlock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) if (all_negative)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) shrink_dcache_parent(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) return all_negative;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) * Trim old(er) caps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) * Because we can't cache an inode without one or more caps, we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) * this indirectly: if a cap is unused, we prune its aliases, at which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) * point the inode will hopefully get dropped to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) * Yes, this is a bit sloppy. Our only real goal here is to respond to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) * memory pressure from the MDS, though, so it needn't be perfect.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) int *remaining = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) int used, wanted, oissued, mine;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) if (*remaining <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) mine = cap->issued | cap->implemented;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) used = __ceph_caps_used(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) wanted = __ceph_caps_file_wanted(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) oissued = __ceph_caps_issued_other(ci, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) dout("trim_caps_cb %p cap %p mine %s oissued %s used %s wanted %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) ceph_cap_string(used), ceph_cap_string(wanted));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) if (cap == ci->i_auth_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) if (ci->i_dirty_caps || ci->i_flushing_caps ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) !list_empty(&ci->i_cap_snaps))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) if ((used | wanted) & CEPH_CAP_ANY_WR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) /* Note: it's possible that i_filelock_ref becomes non-zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) * after dropping auth caps. It doesn't hurt because reply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) * of lock mds request will re-add auth caps. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) if (atomic_read(&ci->i_filelock_ref) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) /* The inode has cached pages, but it's no longer used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) * we can safely drop it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) if (S_ISREG(inode->i_mode) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) wanted == 0 && used == CEPH_CAP_FILE_CACHE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) !(oissued & CEPH_CAP_FILE_CACHE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) used = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) oissued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) if ((used | wanted) & ~oissued & mine)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) goto out; /* we need these caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) if (oissued) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) /* we aren't the only cap.. just remove us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) __ceph_remove_cap(cap, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) (*remaining)--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) struct dentry *dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) /* try dropping referring dentries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) dentry = d_find_any_alias(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) if (dentry && drop_negative_children(dentry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) dput(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) d_prune_aliases(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) count = atomic_read(&inode->i_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) if (count == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) (*remaining)--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) dout("trim_caps_cb %p cap %p pruned, count now %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) inode, cap, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) dput(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) * Trim session cap count down to some max number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) int ceph_trim_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) int max_caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) int trim_caps = session->s_nr_caps - max_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) dout("trim_caps mds%d start: %d / %d, trim %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) session->s_mds, session->s_nr_caps, max_caps, trim_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) if (trim_caps > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) int remaining = trim_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) ceph_iterate_session_caps(session, trim_caps_cb, &remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) session->s_mds, session->s_nr_caps, max_caps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) trim_caps - remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) ceph_flush_cap_releases(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) static int check_caps_flush(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) u64 want_flush_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) int ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) if (!list_empty(&mdsc->cap_flush_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) struct ceph_cap_flush *cf =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) list_first_entry(&mdsc->cap_flush_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) struct ceph_cap_flush, g_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) if (cf->tid <= want_flush_tid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) dout("check_caps_flush still flushing tid "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) "%llu <= %llu\n", cf->tid, want_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) * flush all dirty inode data to disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) * returns true if we've flushed through want_flush_tid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) static void wait_caps_flush(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) u64 want_flush_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) dout("check_caps_flush want %llu\n", want_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) wait_event(mdsc->cap_flushing_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) check_caps_flush(mdsc, want_flush_tid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) * called under s_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) static void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) struct ceph_msg *msg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) struct ceph_mds_cap_release *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) struct ceph_mds_cap_item *item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) LIST_HEAD(tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) int num_cap_releases;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) __le32 barrier, *cap_barrier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) down_read(&osdc->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) barrier = cpu_to_le32(osdc->epoch_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) up_read(&osdc->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) list_splice_init(&session->s_cap_releases, &tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) num_cap_releases = session->s_num_cap_releases;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) session->s_num_cap_releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) while (!list_empty(&tmp_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) if (!msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) PAGE_SIZE, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) if (!msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) head = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) head->num = cpu_to_le32(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) msg->front.iov_len = sizeof(*head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) msg->hdr.version = cpu_to_le16(2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) msg->hdr.compat_version = cpu_to_le16(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) cap = list_first_entry(&tmp_list, struct ceph_cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) list_del(&cap->session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) num_cap_releases--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) head = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) put_unaligned_le32(get_unaligned_le32(&head->num) + 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) &head->num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) item = msg->front.iov_base + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) item->ino = cpu_to_le64(cap->cap_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) item->cap_id = cpu_to_le64(cap->cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) item->migrate_seq = cpu_to_le32(cap->mseq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) item->seq = cpu_to_le32(cap->issue_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) msg->front.iov_len += sizeof(*item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) ceph_put_cap(mdsc, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) // Append cap_barrier field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) cap_barrier = msg->front.iov_base + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) *cap_barrier = barrier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) msg->front.iov_len += sizeof(*cap_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) msg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) BUG_ON(num_cap_releases != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) if (!list_empty(&session->s_cap_releases))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) if (msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) // Append cap_barrier field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) cap_barrier = msg->front.iov_base + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) *cap_barrier = barrier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) msg->front.iov_len += sizeof(*cap_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) pr_err("send_cap_releases mds%d, failed to allocate message\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) list_splice(&tmp_list, &session->s_cap_releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) session->s_num_cap_releases += num_cap_releases;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) static void ceph_cap_release_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) struct ceph_mds_session *session =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) container_of(work, struct ceph_mds_session, s_cap_release_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) if (session->s_state == CEPH_MDS_SESSION_OPEN ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) session->s_state == CEPH_MDS_SESSION_HUNG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) ceph_send_cap_releases(session->s_mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) if (mdsc->stopping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) ceph_get_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) if (queue_work(mdsc->fsc->cap_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) &session->s_cap_release_work)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) dout("cap release work queued\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) dout("failed to queue cap release work\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) * caller holds session->s_cap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) void __ceph_queue_cap_release(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) struct ceph_cap *cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) list_add_tail(&cap->session_caps, &session->s_cap_releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) session->s_num_cap_releases++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) if (!(session->s_num_cap_releases % CEPH_CAPS_PER_RELEASE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) ceph_flush_cap_releases(session->s_mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) static void ceph_cap_reclaim_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) struct ceph_mds_client *mdsc =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) container_of(work, struct ceph_mds_client, cap_reclaim_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) int ret = ceph_trim_dentries(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) if (ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) ceph_queue_cap_reclaim_work(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) if (mdsc->stopping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_reclaim_work)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) dout("caps reclaim work queued\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) dout("failed to queue caps release work\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) int val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) if (!nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) val = atomic_add_return(nr, &mdsc->cap_reclaim_pending);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) if ((val % CEPH_CAPS_PER_RELEASE) < nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) atomic_set(&mdsc->cap_reclaim_pending, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) ceph_queue_cap_reclaim_work(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) * requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) struct inode *dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) struct ceph_inode_info *ci = ceph_inode(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) size_t size = sizeof(struct ceph_mds_reply_dir_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) unsigned int num_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) num_entries = ci->i_files + ci->i_subdirs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) num_entries = max(num_entries, 1U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) num_entries = min(num_entries, opt->max_readdir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) order = get_order(size * num_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) while (order >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) rinfo->dir_entries = (void*)__get_free_pages(GFP_KERNEL |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) __GFP_NOWARN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) if (rinfo->dir_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) order--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) if (!rinfo->dir_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) num_entries = (PAGE_SIZE << order) / size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) num_entries = min(num_entries, opt->max_readdir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) rinfo->dir_buf_size = PAGE_SIZE << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) req->r_num_caps = num_entries + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) req->r_args.readdir.max_entries = cpu_to_le32(num_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) * Create an mds request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) struct ceph_mds_request *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) req = kmem_cache_zalloc(ceph_mds_request_cachep, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) if (!req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) mutex_init(&req->r_fill_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) req->r_mdsc = mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) req->r_started = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) req->r_start_latency = ktime_get();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) req->r_resend_mds = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) INIT_LIST_HEAD(&req->r_unsafe_dir_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) INIT_LIST_HEAD(&req->r_unsafe_target_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) req->r_fmode = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) kref_init(&req->r_kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) RB_CLEAR_NODE(&req->r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) INIT_LIST_HEAD(&req->r_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) init_completion(&req->r_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) init_completion(&req->r_safe_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) INIT_LIST_HEAD(&req->r_unsafe_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) ktime_get_coarse_real_ts64(&req->r_stamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) req->r_op = op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) req->r_direct_mode = mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) return req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) * return oldest (lowest) request, tid in request tree, 0 if none.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) * called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) static struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) if (RB_EMPTY_ROOT(&mdsc->request_tree))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) return rb_entry(rb_first(&mdsc->request_tree),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) static inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) return mdsc->oldest_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) * Build a dentry's path. Allocate on heap; caller must kfree. Based
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) * on build_path_from_dentry in fs/cifs/dir.c.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) * If @stop_on_nosnap, generate path relative to the first non-snapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) * inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) * Encode hidden .snap dirs as a double /, i.e.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) * foo/.snap/bar -> foo//bar
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) int stop_on_nosnap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) struct dentry *temp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) char *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) int pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) unsigned seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) u64 base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) if (!dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) path = __getname();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) pos = PATH_MAX - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) path[pos] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) seq = read_seqbegin(&rename_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) temp = dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) spin_lock(&temp->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) inode = d_inode(temp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) dout("build_path path+%d: %p SNAPDIR\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) pos, temp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) } else if (stop_on_nosnap && inode && dentry != temp &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) ceph_snap(inode) == CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) spin_unlock(&temp->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) pos++; /* get rid of any prepended '/' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) pos -= temp->d_name.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) if (pos < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) spin_unlock(&temp->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) memcpy(path + pos, temp->d_name.name, temp->d_name.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) spin_unlock(&temp->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) temp = READ_ONCE(temp->d_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) /* Are we at the root? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) if (IS_ROOT(temp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) /* Are we out of buffer? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) if (--pos < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) path[pos] = '/';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) base = ceph_ino(d_inode(temp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) if (read_seqretry(&rename_lock, seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) if (pos < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) * A rename didn't occur, but somehow we didn't end up where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) * we thought we would. Throw a warning and try again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) pr_warn("build_path did not end path lookup where "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) "expected, pos is %d\n", pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) *pbase = base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) *plen = PATH_MAX - 1 - pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) dout("build_path on %p %d built %llx '%.*s'\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) dentry, d_count(dentry), base, *plen, path + pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) return path + pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) static int build_dentry_path(struct dentry *dentry, struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) const char **ppath, int *ppathlen, u64 *pino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) bool *pfreepath, bool parent_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) char *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) if (!dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) dir = d_inode_rcu(dentry->d_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) *pino = ceph_ino(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) *ppath = dentry->d_name.name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) *ppathlen = dentry->d_name.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) if (IS_ERR(path))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) return PTR_ERR(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) *ppath = path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) *pfreepath = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) static int build_inode_path(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) const char **ppath, int *ppathlen, u64 *pino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) bool *pfreepath)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) struct dentry *dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) char *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) if (ceph_snap(inode) == CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) *pino = ceph_ino(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) *ppathlen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) dentry = d_find_alias(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) dput(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) if (IS_ERR(path))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) return PTR_ERR(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) *ppath = path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) *pfreepath = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) * request arguments may be specified via an inode *, a dentry *, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) * an explicit ino+path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) struct inode *rdiri, const char *rpath,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) u64 rino, const char **ppath, int *pathlen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) u64 *ino, bool *freepath, bool parent_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) int r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) if (rinode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) r = build_inode_path(rinode, ppath, pathlen, ino, freepath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) ceph_snap(rinode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) } else if (rdentry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) freepath, parent_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) *ppath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) } else if (rpath || rino) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) *ino = rino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) *ppath = rpath;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) *pathlen = rpath ? strlen(rpath) : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) dout(" path %.*s\n", *pathlen, rpath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) int mds, bool drop_cap_releases)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) struct ceph_mds_request_head *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) const char *path1 = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) const char *path2 = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) u64 ino1 = 0, ino2 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) int pathlen1 = 0, pathlen2 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) bool freepath1 = false, freepath2 = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) u16 releases;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) void *p, *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) ret = set_request_path_attr(req->r_inode, req->r_dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) req->r_parent, req->r_path1, req->r_ino1.ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) &path1, &pathlen1, &ino1, &freepath1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) test_bit(CEPH_MDS_R_PARENT_LOCKED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) &req->r_req_flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) msg = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) /* If r_old_dentry is set, then assume that its parent is locked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) ret = set_request_path_attr(NULL, req->r_old_dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) req->r_old_dentry_dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) req->r_path2, req->r_ino2.ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) &path2, &pathlen2, &ino2, &freepath2, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) msg = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) goto out_free1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) len = sizeof(*head) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) sizeof(struct ceph_timespec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) /* calculate (max) length for cap releases */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) len += sizeof(struct ceph_mds_request_release) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) (!!req->r_inode_drop + !!req->r_dentry_drop +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) !!req->r_old_inode_drop + !!req->r_old_dentry_drop);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) if (req->r_dentry_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) len += pathlen1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) if (req->r_old_dentry_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) len += pathlen2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) if (!msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) msg = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) goto out_free2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) msg->hdr.version = cpu_to_le16(2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) msg->hdr.tid = cpu_to_le64(req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) head = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) p = msg->front.iov_base + sizeof(*head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) end = msg->front.iov_base + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) head->op = cpu_to_le32(req->r_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) head->ino = cpu_to_le64(req->r_deleg_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) head->args = req->r_args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) ceph_encode_filepath(&p, end, ino1, path1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) ceph_encode_filepath(&p, end, ino2, path2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) /* make note of release offset, in case we need to replay */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) req->r_request_release_offset = p - msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) /* cap releases */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) if (req->r_inode_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) releases += ceph_encode_inode_release(&p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) req->r_inode ? req->r_inode : d_inode(req->r_dentry),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) mds, req->r_inode_drop, req->r_inode_unless,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) req->r_op == CEPH_MDS_OP_READDIR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) if (req->r_dentry_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) releases += ceph_encode_dentry_release(&p, req->r_dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) req->r_parent, mds, req->r_dentry_drop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) req->r_dentry_unless);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) if (req->r_old_dentry_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) req->r_old_dentry_dir, mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) req->r_old_dentry_drop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) req->r_old_dentry_unless);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) if (req->r_old_inode_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) releases += ceph_encode_inode_release(&p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) d_inode(req->r_old_dentry),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) if (drop_cap_releases) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) p = msg->front.iov_base + req->r_request_release_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) head->num_releases = cpu_to_le16(releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) /* time stamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) struct ceph_timespec ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) ceph_encode_timespec64(&ts, &req->r_stamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) ceph_encode_copy(&p, &ts, sizeof(ts));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) if (WARN_ON_ONCE(p > end)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) ceph_msg_put(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) msg = ERR_PTR(-ERANGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) goto out_free2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) msg->front.iov_len = p - msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) if (req->r_pagelist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) struct ceph_pagelist *pagelist = req->r_pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) ceph_msg_data_add_pagelist(msg, pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) msg->hdr.data_len = cpu_to_le32(pagelist->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) msg->hdr.data_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) msg->hdr.data_off = cpu_to_le16(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) out_free2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) if (freepath2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) ceph_mdsc_free_path((char *)path2, pathlen2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) out_free1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) if (freepath1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) ceph_mdsc_free_path((char *)path1, pathlen1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) return msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) * called under mdsc->mutex if error, under no mutex if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) * success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) static void complete_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) req->r_end_latency = ktime_get();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) if (req->r_callback)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) req->r_callback(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) complete_all(&req->r_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) static int __prepare_send_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) int mds, bool drop_cap_releases)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) struct ceph_mds_request_head *rhead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) int flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) req->r_attempts++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) if (req->r_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) struct ceph_cap *cap =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) if (cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) req->r_sent_on_mseq = cap->mseq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) req->r_sent_on_mseq = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) * Replay. Do not regenerate message (and rebuild
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) * paths, etc.); just use the original message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) * Rebuilding paths will break for renames because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) * d_move mangles the src name.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) msg = req->r_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) rhead = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) flags = le32_to_cpu(rhead->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) flags |= CEPH_MDS_FLAG_REPLAY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) rhead->flags = cpu_to_le32(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) if (req->r_target_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) rhead->num_retry = req->r_attempts - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) /* remove cap/dentry releases from message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) rhead->num_releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) /* time stamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) p = msg->front.iov_base + req->r_request_release_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) struct ceph_timespec ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) ceph_encode_timespec64(&ts, &req->r_stamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) ceph_encode_copy(&p, &ts, sizeof(ts));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) msg->front.iov_len = p - msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) if (req->r_request) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) ceph_msg_put(req->r_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) req->r_request = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) msg = create_request_message(mdsc, req, mds, drop_cap_releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) if (IS_ERR(msg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) req->r_err = PTR_ERR(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) return PTR_ERR(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) req->r_request = msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) rhead = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) flags |= CEPH_MDS_FLAG_REPLAY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) flags |= CEPH_MDS_FLAG_ASYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) if (req->r_parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) flags |= CEPH_MDS_FLAG_WANT_DENTRY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) rhead->flags = cpu_to_le32(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) rhead->num_fwd = req->r_num_fwd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) rhead->num_retry = req->r_attempts - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) dout(" r_parent = %p\n", req->r_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) static int __send_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) bool drop_cap_releases)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) err = __prepare_send_request(mdsc, req, session->s_mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) drop_cap_releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) if (!err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) ceph_msg_get(req->r_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) ceph_con_send(&session->s_con, req->r_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) * send request, or put it on the appropriate wait list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) static void __do_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) struct ceph_mds_session *session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) int mds = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) bool random;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) __unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) if (req->r_timeout &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) time_after_eq(jiffies, req->r_started + req->r_timeout)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) dout("do_request timed out\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) err = -ETIMEDOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) dout("do_request forced umount\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) if (mdsc->mdsmap_err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) err = mdsc->mdsmap_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) dout("do_request mdsmap err %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) if (mdsc->mdsmap->m_epoch == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) dout("do_request no mdsmap, waiting for map\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) list_add(&req->r_wait, &mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) if (!(mdsc->fsc->mount_options->flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) CEPH_MOUNT_OPT_MOUNTWAIT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) err = -EHOSTUNREACH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) put_request_session(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) mds = __choose_mds(mdsc, req, &random);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) if (mds < 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) err = -EJUKEBOX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) dout("do_request no mds or not active, waiting for map\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) list_add(&req->r_wait, &mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) /* get, open session */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) session = __ceph_lookup_mds_session(mdsc, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) if (!session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) session = register_session(mdsc, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) if (IS_ERR(session)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) err = PTR_ERR(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) req->r_session = ceph_get_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) dout("do_request mds%d session %p state %s\n", mds, session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) ceph_session_state_name(session->s_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) if (session->s_state != CEPH_MDS_SESSION_OPEN &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) session->s_state != CEPH_MDS_SESSION_HUNG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) err = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) goto out_session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) * We cannot queue async requests since the caps and delegated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) * inodes are bound to the session. Just return -EJUKEBOX and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) * let the caller retry a sync request in that case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) err = -EJUKEBOX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) goto out_session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) if (session->s_state == CEPH_MDS_SESSION_NEW ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) session->s_state == CEPH_MDS_SESSION_CLOSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) err = __open_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) goto out_session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) /* retry the same mds later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) if (random)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) req->r_resend_mds = mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) list_add(&req->r_wait, &session->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) goto out_session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) /* send request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) req->r_resend_mds = -1; /* forget any previous mds hint */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) if (req->r_request_started == 0) /* note request start time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) req->r_request_started = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) err = __send_request(mdsc, session, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) out_session:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) dout("__do_request early error %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) req->r_err = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) complete_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) __unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) static void __wake_requests(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) struct list_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) LIST_HEAD(tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) list_splice_init(head, &tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) while (!list_empty(&tmp_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) req = list_entry(tmp_list.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) struct ceph_mds_request, r_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) list_del_init(&req->r_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) dout(" wake request %p tid %llu\n", req, req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) __do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) * Wake up threads with requests pending for @mds, so that they can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) * resubmit their requests to a possibly different mds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) static void kick_requests(struct ceph_mds_client *mdsc, int mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) struct rb_node *p = rb_first(&mdsc->request_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) dout("kick_requests mds%d\n", mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) req = rb_entry(p, struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) if (req->r_attempts > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) continue; /* only new requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) if (req->r_session &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) req->r_session->s_mds == mds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) dout(" kicking tid %llu\n", req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) list_del_init(&req->r_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) __do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) /* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) if (req->r_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) if (req->r_parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) struct ceph_inode_info *ci = ceph_inode(req->r_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) int fmode = (req->r_op & CEPH_MDS_OP_WRITE) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) CEPH_FILE_MODE_WR : CEPH_FILE_MODE_RD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) ceph_take_cap_refs(ci, CEPH_CAP_PIN, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) __ceph_touch_fmode(ci, mdsc, fmode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) ihold(req->r_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) if (req->r_old_dentry_dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) CEPH_CAP_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) if (req->r_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) err = ceph_wait_on_async_create(req->r_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) dout("%s: wait for async create returned: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) __func__, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) if (!err && req->r_old_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) err = ceph_wait_on_async_create(req->r_old_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) dout("%s: wait for async create returned: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) __func__, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) dout("submit_request on %p for inode %p\n", req, dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) __register_request(mdsc, req, dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) __do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) err = req->r_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) /* wait */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) dout("do_request waiting\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) if (!req->r_timeout && req->r_wait_for_completion) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) err = req->r_wait_for_completion(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) long timeleft = wait_for_completion_killable_timeout(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) &req->r_completion,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) ceph_timeout_jiffies(req->r_timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) if (timeleft > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) else if (!timeleft)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) err = -ETIMEDOUT; /* timed out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) err = timeleft; /* killed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) dout("do_request waited, got %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) /* only abort if we didn't race with a real reply */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) err = le32_to_cpu(req->r_reply_info.head->result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) } else if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) dout("aborted request %lld with %d\n", req->r_tid, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) * ensure we aren't running concurrently with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) * ceph_fill_trace or ceph_readdir_prepopulate, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) * rely on locks (dir mutex) held by our caller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) mutex_lock(&req->r_fill_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) req->r_err = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) mutex_unlock(&req->r_fill_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) if (req->r_parent &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) (req->r_op & CEPH_MDS_OP_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) ceph_invalidate_dir_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) err = req->r_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) * Synchrously perform an mds request. Take care of all of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) * session setup, forwarding, retry details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) dout("do_request on %p\n", req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) /* issue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) err = ceph_mdsc_submit_request(mdsc, dir, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) err = ceph_mdsc_wait_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) dout("do_request %p done, result %d\n", req, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) * Invalidate dir's completeness, dentry lease state on an aborted MDS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) * namespace request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) void ceph_invalidate_dir_request(struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) struct inode *dir = req->r_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) struct inode *old_dir = req->r_old_dentry_dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) dout("invalidate_dir_request %p %p (complete, lease(s))\n", dir, old_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) ceph_dir_clear_complete(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) if (old_dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) ceph_dir_clear_complete(old_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) if (req->r_dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) ceph_invalidate_dentry_lease(req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) if (req->r_old_dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) ceph_invalidate_dentry_lease(req->r_old_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085) * Handle mds reply.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087) * We take the session mutex and parse and process the reply immediately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088) * This preserves the logical ordering of replies, capabilities, etc., sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089) * by the MDS as they are applied to our local cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) struct ceph_mds_client *mdsc = session->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) struct ceph_mds_reply_head *head = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) struct ceph_mds_reply_info_parsed *rinfo; /* parsed reply info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) struct ceph_snap_realm *realm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) u64 tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) int err, result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) if (msg->front.iov_len < sizeof(*head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) pr_err("mdsc_handle_reply got corrupt (short) reply\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) ceph_msg_dump(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) /* get request, session */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) tid = le64_to_cpu(msg->hdr.tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) req = lookup_get_request(mdsc, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) if (!req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) dout("handle_reply on unknown tid %llu\n", tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) dout("handle_reply %p\n", req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) /* correct session? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) if (req->r_session != session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) pr_err("mdsc_handle_reply got %llu on session mds%d"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) " not mds%d\n", tid, session->s_mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) req->r_session ? req->r_session->s_mds : -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) /* dup? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) pr_warn("got a dup %s reply on %llu from mds%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) head->safe ? "safe" : "unsafe", tid, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) pr_warn("got unsafe after safe on %llu from mds%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) tid, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) result = le32_to_cpu(head->result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) * Handle an ESTALE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) * if we're not talking to the authority, send to them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) * if the authority has changed while we weren't looking,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) * send to new authority
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) * Otherwise we just have to return an ESTALE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) if (result == -ESTALE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) dout("got ESTALE on request %llu\n", req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) req->r_resend_mds = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) if (req->r_direct_mode != USE_AUTH_MDS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) dout("not using auth, setting for that now\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) req->r_direct_mode = USE_AUTH_MDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) __do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) int mds = __choose_mds(mdsc, req, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) if (mds >= 0 && mds != req->r_session->s_mds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) dout("but auth changed, so resending\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) __do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) dout("have to return ESTALE on request %llu\n", req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) if (head->safe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) __unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) /* last request during umount? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) if (mdsc->stopping && !__get_oldest_req(mdsc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) complete_all(&mdsc->safe_umount_waiters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) * We already handled the unsafe response, now do the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) * cleanup. No need to examine the response; the MDS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) * doesn't include any result info in the safe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) * response. And even if it did, there is nothing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) * useful we could do with a revised return value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) dout("got safe reply %llu, mds%d\n", tid, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) set_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) dout("handle_reply tid %lld result %d\n", tid, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) rinfo = &req->r_reply_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) if (test_bit(CEPHFS_FEATURE_REPLY_ENCODING, &session->s_features))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) err = parse_reply_info(session, msg, rinfo, (u64)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) ceph_msg_dump(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) /* snap trace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) realm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) if (rinfo->snapblob_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) down_write(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) ceph_update_snap_trace(mdsc, rinfo->snapblob,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) rinfo->snapblob + rinfo->snapblob_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) &realm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) downgrade_write(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) down_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) /* insert trace into our cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) mutex_lock(&req->r_fill_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) current->journal_info = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) err = ceph_fill_trace(mdsc->fsc->sb, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) if (err == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) req->r_op == CEPH_MDS_OP_LSSNAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) ceph_readdir_prepopulate(req, req->r_session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) current->journal_info = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) mutex_unlock(&req->r_fill_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) up_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) if (realm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) ceph_put_snap_realm(mdsc, realm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) if (err == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) if (req->r_target_inode &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) struct ceph_inode_info *ci =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) ceph_inode(req->r_target_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) spin_lock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) list_add_tail(&req->r_unsafe_target_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) &ci->i_unsafe_iops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) spin_unlock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) if (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) req->r_err = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) req->r_reply = ceph_msg_get(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) dout("reply arrived after request %lld was aborted\n", tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) /* kick calling process */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) complete_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) ceph_update_metadata_latency(&mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) req->r_end_latency, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286) * handle mds notification that our request has been forwarded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) static void handle_forward(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) u64 tid = le64_to_cpu(msg->hdr.tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) u32 next_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) u32 fwd_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) int err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) void *p = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) void *end = p + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) ceph_decode_need(&p, end, 2*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) next_mds = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) fwd_seq = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) req = lookup_get_request(mdsc, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) if (!req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) goto out; /* dup reply? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) dout("forward tid %llu aborted, unregistering\n", tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) __unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) } else if (fwd_seq <= req->r_num_fwd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) dout("forward tid %llu to mds%d - old seq %d <= %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) tid, next_mds, req->r_num_fwd, fwd_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) /* resend. forward race not possible; mds would drop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) BUG_ON(req->r_err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) req->r_attempts = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) req->r_num_fwd = fwd_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) req->r_resend_mds = next_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) put_request_session(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) __do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) pr_err("mdsc_handle_forward decode error err=%d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) static int __decode_session_metadata(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) bool *blocklisted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) /* map<string,string> */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) u32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) bool err_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) ceph_decode_32_safe(p, end, n, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) while (n-- > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) ceph_decode_32_safe(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) ceph_decode_need(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) err_str = !strncmp(*p, "error_string", len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) *p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) ceph_decode_32_safe(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) ceph_decode_need(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) * Match "blocklisted (blacklisted)" from newer MDSes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) * or "blacklisted" from older MDSes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) if (err_str && strnstr(*p, "blacklisted", len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) *blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) *p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366) * handle a mds session control message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) static void handle_session(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) struct ceph_mds_client *mdsc = session->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) int msg_version = le16_to_cpu(msg->hdr.version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) void *p = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) void *end = p + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) struct ceph_mds_session_head *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) u32 op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) u64 seq, features = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) int wake = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) bool blocklisted = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) /* decode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) ceph_decode_need(&p, end, sizeof(*h), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) h = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) p += sizeof(*h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) op = le32_to_cpu(h->op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) seq = le64_to_cpu(h->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) if (msg_version >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) /* version >= 2, metadata */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) if (__decode_session_metadata(&p, end, &blocklisted) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) /* version >= 3, feature bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) ceph_decode_32_safe(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) if (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) ceph_decode_64_safe(&p, end, features, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) p += len - sizeof(features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) if (op == CEPH_SESSION_CLOSE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) ceph_get_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) __unregister_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) /* FIXME: this ttl calculation is generous */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) dout("handle_session mds%d %s %p state %s seq %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) mds, ceph_session_op_name(op), session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) ceph_session_state_name(session->s_state), seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) if (session->s_state == CEPH_MDS_SESSION_HUNG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) session->s_state = CEPH_MDS_SESSION_OPEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) pr_info("mds%d came back\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) switch (op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) case CEPH_SESSION_OPEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) pr_info("mds%d reconnect success\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) session->s_state = CEPH_MDS_SESSION_OPEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) session->s_features = features;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) renewed_caps(mdsc, session, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) metric_schedule_delayed(&mdsc->metric);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) wake = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) if (mdsc->stopping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) __close_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) case CEPH_SESSION_RENEWCAPS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) if (session->s_renew_seq == seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) renewed_caps(mdsc, session, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) case CEPH_SESSION_CLOSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) pr_info("mds%d reconnect denied\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) session->s_state = CEPH_MDS_SESSION_CLOSED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) cleanup_session_requests(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) remove_session_caps(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) wake = 2; /* for good measure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) wake_up_all(&mdsc->session_close_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) case CEPH_SESSION_STALE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) pr_info("mds%d caps went stale, renewing\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) spin_lock(&session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) session->s_cap_gen++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) session->s_cap_ttl = jiffies - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) spin_unlock(&session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) send_renew_caps(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) case CEPH_SESSION_RECALL_STATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) ceph_trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) case CEPH_SESSION_FLUSHMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) send_flushmsg_ack(mdsc, session, seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) case CEPH_SESSION_FORCE_RO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) dout("force_session_readonly %p\n", session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) session->s_readonly = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) wake_up_session_caps(session, FORCE_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) case CEPH_SESSION_REJECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) WARN_ON(session->s_state != CEPH_MDS_SESSION_OPENING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) pr_info("mds%d rejected session\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) session->s_state = CEPH_MDS_SESSION_REJECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) cleanup_session_requests(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) remove_session_caps(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) if (blocklisted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) mdsc->fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) wake = 2; /* for good measure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) if (wake) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) __wake_requests(mdsc, &session->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) if (wake == 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) kick_requests(mdsc, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) if (op == CEPH_SESSION_CLOSE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) pr_err("mdsc_handle_session corrupt message mds%d len %d\n", mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) (int)msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) ceph_msg_dump(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) int dcaps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) dcaps = xchg(&req->r_dir_caps, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) if (dcaps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) int dcaps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) dcaps = xchg(&req->r_dir_caps, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) if (dcaps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) dcaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537) * called under session->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) struct ceph_mds_request *req, *nreq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) dout("replay_unsafe_requests mds%d\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) __send_request(mdsc, session, req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) * also re-send old requests when MDS enters reconnect stage. So that MDS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) * can process completed request in clientreplay stage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) p = rb_first(&mdsc->request_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) req = rb_entry(p, struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) if (req->r_attempts == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) continue; /* only old requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) if (!req->r_session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) if (req->r_session->s_mds != session->s_mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) ceph_mdsc_release_dir_caps_no_check(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) __send_request(mdsc, session, req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) static int send_reconnect_partial(struct ceph_reconnect_state *recon_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) struct ceph_msg *reply;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) struct ceph_pagelist *_pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) __le32 *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) int err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) if (!recon_state->allow_multi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) /* can't handle message that contains both caps and realm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) BUG_ON(!recon_state->nr_caps == !recon_state->nr_realms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) /* pre-allocate new pagelist */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) _pagelist = ceph_pagelist_alloc(GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) if (!_pagelist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) if (!reply)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) goto fail_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) /* placeholder for nr_caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) err = ceph_pagelist_encode_32(_pagelist, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) if (recon_state->nr_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) /* currently encoding caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) err = ceph_pagelist_encode_32(recon_state->pagelist, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) /* placeholder for nr_realms (currently encoding relams) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) err = ceph_pagelist_encode_32(_pagelist, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) err = ceph_pagelist_encode_8(recon_state->pagelist, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) page = list_first_entry(&recon_state->pagelist->head, struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) addr = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) if (recon_state->nr_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) /* currently encoding caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) *addr = cpu_to_le32(recon_state->nr_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) /* currently encoding relams */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) *(addr + 1) = cpu_to_le32(recon_state->nr_realms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) kunmap_atomic(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) reply->hdr.version = cpu_to_le16(5);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) reply->hdr.compat_version = cpu_to_le16(4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) reply->hdr.data_len = cpu_to_le32(recon_state->pagelist->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) ceph_msg_data_add_pagelist(reply, recon_state->pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) ceph_con_send(&recon_state->session->s_con, reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) ceph_pagelist_release(recon_state->pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) recon_state->pagelist = _pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) recon_state->nr_caps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) recon_state->nr_realms = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) recon_state->msg_version = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) ceph_msg_put(reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) fail_msg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) ceph_pagelist_release(_pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) static struct dentry* d_find_primary(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) struct dentry *alias, *dn = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) if (hlist_empty(&inode->i_dentry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) if (hlist_empty(&inode->i_dentry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) if (S_ISDIR(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) if (!IS_ROOT(alias))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) dn = dget(alias);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) spin_lock(&alias->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) if (!d_unhashed(alias) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) (ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) dn = dget_dlock(alias);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) spin_unlock(&alias->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) if (dn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) return dn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685) * Encode information about a cap for a reconnect with the MDS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) struct ceph_mds_cap_reconnect v2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) struct ceph_mds_cap_reconnect_v1 v1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) } rec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) struct ceph_inode_info *ci = cap->ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) struct ceph_reconnect_state *recon_state = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) struct ceph_pagelist *pagelist = recon_state->pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) struct dentry *dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) char *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) int pathlen = 0, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) u64 pathbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) u64 snap_follows;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) inode, ceph_vinop(inode), cap, cap->cap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) ceph_cap_string(cap->issued));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) dentry = d_find_primary(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) if (dentry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) /* set pathbase to parent dir when msg_version >= 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) recon_state->msg_version >= 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) dput(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) if (IS_ERR(path)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) err = PTR_ERR(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) path = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) pathbase = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) cap->seq = 0; /* reset cap seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) cap->issue_seq = 0; /* and issue_seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) cap->mseq = 0; /* and migrate_seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) cap->cap_gen = cap->session->s_cap_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) /* These are lost when the session goes away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) if (S_ISDIR(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) if (cap->issued & CEPH_CAP_DIR_CREATE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) ceph_put_string(rcu_dereference_raw(ci->i_cached_layout.pool_ns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) cap->issued &= ~CEPH_CAP_ANY_DIR_OPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) if (recon_state->msg_version >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) rec.v2.cap_id = cpu_to_le64(cap->cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) rec.v2.issued = cpu_to_le32(cap->issued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) rec.v2.pathbase = cpu_to_le64(pathbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) rec.v2.flock_len = (__force __le32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) rec.v1.cap_id = cpu_to_le64(cap->cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) rec.v1.issued = cpu_to_le32(cap->issued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) rec.v1.size = cpu_to_le64(inode->i_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) rec.v1.pathbase = cpu_to_le64(pathbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) if (list_empty(&ci->i_cap_snaps)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) snap_follows = ci->i_head_snapc ? ci->i_head_snapc->seq : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) struct ceph_cap_snap *capsnap =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) list_first_entry(&ci->i_cap_snaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) struct ceph_cap_snap, ci_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) snap_follows = capsnap->follows;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) if (recon_state->msg_version >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) int num_fcntl_locks, num_flock_locks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) struct ceph_filelock *flocks = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) size_t struct_len, total_len = sizeof(u64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) u8 struct_v = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) encode_again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) if (rec.v2.flock_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) num_fcntl_locks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) num_flock_locks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) if (num_fcntl_locks + num_flock_locks > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) flocks = kmalloc_array(num_fcntl_locks + num_flock_locks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) sizeof(struct ceph_filelock),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) if (!flocks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) err = ceph_encode_locks_to_buffer(inode, flocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) num_fcntl_locks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) num_flock_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) kfree(flocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) flocks = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) if (err == -ENOSPC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) goto encode_again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) kfree(flocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) flocks = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) if (recon_state->msg_version >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) /* version, compat_version and struct_len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) total_len += 2 * sizeof(u8) + sizeof(u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) struct_v = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) * number of encoded locks is stable, so copy to pagelist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) struct_len = 2 * sizeof(u32) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) (num_fcntl_locks + num_flock_locks) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) sizeof(struct ceph_filelock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) rec.v2.flock_len = cpu_to_le32(struct_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) struct_len += sizeof(u32) + pathlen + sizeof(rec.v2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) if (struct_v >= 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) struct_len += sizeof(u64); /* snap_follows */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) total_len += struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) if (pagelist->length + total_len > RECONNECT_MAX_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) err = send_reconnect_partial(recon_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) goto out_freeflocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) pagelist = recon_state->pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) err = ceph_pagelist_reserve(pagelist, total_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) goto out_freeflocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) if (recon_state->msg_version >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) ceph_pagelist_encode_8(pagelist, struct_v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) ceph_pagelist_encode_8(pagelist, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) ceph_pagelist_encode_32(pagelist, struct_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) ceph_pagelist_encode_string(pagelist, path, pathlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) ceph_locks_to_pagelist(flocks, pagelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) num_fcntl_locks, num_flock_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) if (struct_v >= 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) ceph_pagelist_encode_64(pagelist, snap_follows);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) out_freeflocks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) kfree(flocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) err = ceph_pagelist_reserve(pagelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) sizeof(u64) + sizeof(u32) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) pathlen + sizeof(rec.v1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) ceph_pagelist_encode_string(pagelist, path, pathlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) ceph_mdsc_free_path(path, pathlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) recon_state->nr_caps++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) static int encode_snap_realms(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) struct ceph_reconnect_state *recon_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) struct ceph_pagelist *pagelist = recon_state->pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) if (recon_state->msg_version >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) err = ceph_pagelist_encode_32(pagelist, mdsc->num_snap_realms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) * snaprealms. we provide mds with the ino, seq (version), and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) * parent for all of our realms. If the mds has any newer info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) * it will tell us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) for (p = rb_first(&mdsc->snap_realms); p; p = rb_next(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) struct ceph_snap_realm *realm =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) rb_entry(p, struct ceph_snap_realm, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) struct ceph_mds_snaprealm_reconnect sr_rec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) if (recon_state->msg_version >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) size_t need = sizeof(u8) * 2 + sizeof(u32) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) sizeof(sr_rec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) if (pagelist->length + need > RECONNECT_MAX_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) err = send_reconnect_partial(recon_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) pagelist = recon_state->pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) err = ceph_pagelist_reserve(pagelist, need);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) ceph_pagelist_encode_8(pagelist, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) ceph_pagelist_encode_8(pagelist, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) ceph_pagelist_encode_32(pagelist, sizeof(sr_rec));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) dout(" adding snap realm %llx seq %lld parent %llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) realm->ino, realm->seq, realm->parent_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) sr_rec.ino = cpu_to_le64(realm->ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) sr_rec.seq = cpu_to_le64(realm->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) sr_rec.parent = cpu_to_le64(realm->parent_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) err = ceph_pagelist_append(pagelist, &sr_rec, sizeof(sr_rec));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) recon_state->nr_realms++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927) * If an MDS fails and recovers, clients need to reconnect in order to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928) * reestablish shared state. This includes all caps issued through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929) * this session _and_ the snap_realm hierarchy. Because it's not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930) * clear which snap realms the mds cares about, we send everything we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931) * know about.. that ensures we'll then get any new info the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932) * recovering MDS might have.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934) * This is a relatively heavyweight operation, but it's rare.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) static void send_mds_reconnect(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) struct ceph_msg *reply;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940) int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) int err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) struct ceph_reconnect_state recon_state = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) .session = session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) LIST_HEAD(dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) pr_info("mds%d reconnect start\n", mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) recon_state.pagelist = ceph_pagelist_alloc(GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) if (!recon_state.pagelist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) goto fail_nopagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) if (!reply)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) goto fail_nomsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) xa_destroy(&session->s_delegated_inos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) session->s_state = CEPH_MDS_SESSION_RECONNECTING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) session->s_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) dout("session %p state %s\n", session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) ceph_session_state_name(session->s_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) spin_lock(&session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) session->s_cap_gen++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) spin_unlock(&session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) /* don't know if session is readonly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) session->s_readonly = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) * notify __ceph_remove_cap() that we are composing cap reconnect.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) * If a cap get released before being added to the cap reconnect,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) * __ceph_remove_cap() should skip queuing cap release.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) session->s_cap_reconnect = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) /* drop old cap expires; we're about to reestablish that state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) detach_cap_releases(session, &dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) dispose_cap_releases(mdsc, &dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) /* trim unused caps to reduce MDS's cache rejoin time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) if (mdsc->fsc->sb->s_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) shrink_dcache_parent(mdsc->fsc->sb->s_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) ceph_con_close(&session->s_con);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) ceph_con_open(&session->s_con,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) CEPH_ENTITY_TYPE_MDS, mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) /* replay unsafe requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) replay_unsafe_requests(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) ceph_early_kick_flushing_caps(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) down_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) /* placeholder for nr_caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) err = ceph_pagelist_encode_32(recon_state.pagelist, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) if (test_bit(CEPHFS_FEATURE_MULTI_RECONNECT, &session->s_features)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) recon_state.msg_version = 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) recon_state.allow_multi = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) } else if (session->s_con.peer_features & CEPH_FEATURE_MDSENC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) recon_state.msg_version = 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) recon_state.msg_version = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) /* trsaverse this session's caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) err = ceph_iterate_session_caps(session, reconnect_caps_cb, &recon_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) session->s_cap_reconnect = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) /* check if all realms can be encoded into current message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) if (mdsc->num_snap_realms) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) size_t total_len =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) recon_state.pagelist->length +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) mdsc->num_snap_realms *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) sizeof(struct ceph_mds_snaprealm_reconnect);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) if (recon_state.msg_version >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) /* number of realms */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) total_len += sizeof(u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) /* version, compat_version and struct_len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) total_len += mdsc->num_snap_realms *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) (2 * sizeof(u8) + sizeof(u32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) if (total_len > RECONNECT_MAX_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) if (!recon_state.allow_multi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) err = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) if (recon_state.nr_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) err = send_reconnect_partial(&recon_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) recon_state.msg_version = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) err = encode_snap_realms(mdsc, &recon_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) if (recon_state.msg_version >= 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) err = ceph_pagelist_encode_8(recon_state.pagelist, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) if (recon_state.nr_caps || recon_state.nr_realms) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) struct page *page =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) list_first_entry(&recon_state.pagelist->head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) __le32 *addr = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) if (recon_state.nr_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) WARN_ON(recon_state.nr_realms != mdsc->num_snap_realms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) *addr = cpu_to_le32(recon_state.nr_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) } else if (recon_state.msg_version >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) *(addr + 1) = cpu_to_le32(recon_state.nr_realms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) kunmap_atomic(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) reply->hdr.version = cpu_to_le16(recon_state.msg_version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) if (recon_state.msg_version >= 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076) reply->hdr.compat_version = cpu_to_le16(4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) reply->hdr.data_len = cpu_to_le32(recon_state.pagelist->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) ceph_msg_data_add_pagelist(reply, recon_state.pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) ceph_con_send(&session->s_con, reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086) __wake_requests(mdsc, &session->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) up_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) ceph_pagelist_release(recon_state.pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) ceph_msg_put(reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) up_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) fail_nomsg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) ceph_pagelist_release(recon_state.pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) fail_nopagelist:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) pr_err("error %d preparing reconnect for mds%d\n", err, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106) * compare old and new mdsmaps, kicking requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107) * and closing out old connections as necessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109) * called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) static void check_new_map(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) struct ceph_mdsmap *newmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) struct ceph_mdsmap *oldmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) int oldstate, newstate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) struct ceph_mds_session *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) dout("check_new_map new %u old %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) newmap->m_epoch, oldmap->m_epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) if (!mdsc->sessions[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) s = mdsc->sessions[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) oldstate = ceph_mdsmap_get_state(oldmap, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) newstate = ceph_mdsmap_get_state(newmap, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) i, ceph_mds_state_name(oldstate),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) ceph_mds_state_name(newstate),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) ceph_session_state_name(s->s_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) if (i >= newmap->possible_max_rank) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) /* force close session for stopped mds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) ceph_get_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) __unregister_session(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) __wake_requests(mdsc, &s->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) mutex_lock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) cleanup_session_requests(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) remove_session_caps(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) mutex_unlock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) kick_requests(mdsc, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) if (memcmp(ceph_mdsmap_get_addr(oldmap, i),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) ceph_mdsmap_get_addr(newmap, i),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) sizeof(struct ceph_entity_addr))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) /* just close it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) mutex_lock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) ceph_con_close(&s->s_con);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) mutex_unlock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) s->s_state = CEPH_MDS_SESSION_RESTARTING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) } else if (oldstate == newstate) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) continue; /* nothing new with this mds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) * send reconnect?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) if (s->s_state == CEPH_MDS_SESSION_RESTARTING &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) newstate >= CEPH_MDS_STATE_RECONNECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) send_mds_reconnect(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) * kick request on any mds that has gone active.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) if (oldstate < CEPH_MDS_STATE_ACTIVE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) newstate >= CEPH_MDS_STATE_ACTIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) if (oldstate != CEPH_MDS_STATE_CREATING &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) oldstate != CEPH_MDS_STATE_STARTING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) pr_info("mds%d recovery completed\n", s->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) kick_requests(mdsc, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) mutex_lock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) ceph_kick_flushing_caps(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) mutex_unlock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) wake_up_session_caps(s, RECONNECT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) s = mdsc->sessions[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) if (!ceph_mdsmap_is_laggy(newmap, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) if (s->s_state == CEPH_MDS_SESSION_OPEN ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) s->s_state == CEPH_MDS_SESSION_HUNG ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205) s->s_state == CEPH_MDS_SESSION_CLOSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) dout(" connecting to export targets of laggy mds%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) __open_export_target_sessions(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216) * leases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220) * caller must hold session s_mutex, dentry->d_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) struct ceph_dentry_info *di = ceph_dentry(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) ceph_put_mds_session(di->lease_session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) di->lease_session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) static void handle_lease(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) struct super_block *sb = mdsc->fsc->sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) struct dentry *parent, *dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) struct ceph_dentry_info *di;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) struct ceph_mds_lease *h = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240) u32 seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) struct ceph_vino vino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) struct qstr dname;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) int release = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) dout("handle_lease from mds%d\n", mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) /* decode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) if (msg->front.iov_len < sizeof(*h) + sizeof(u32))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) vino.ino = le64_to_cpu(h->ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) vino.snap = CEPH_NOSNAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) seq = le32_to_cpu(h->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) dname.len = get_unaligned_le32(h + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254) if (msg->front.iov_len < sizeof(*h) + sizeof(u32) + dname.len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) dname.name = (void *)(h + 1) + sizeof(u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258) /* lookup inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) inode = ceph_find_inode(sb, vino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) dout("handle_lease %s, ino %llx %p %.*s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) ceph_lease_op_name(h->action), vino.ino, inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) dname.len, dname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) inc_session_sequence(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) if (!inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) dout("handle_lease no inode %llx\n", vino.ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) goto release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272) /* dentry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) parent = d_find_alias(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) if (!parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) dout("no parent dentry on inode %p\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) goto release; /* hrm... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) dname.hash = full_name_hash(parent, dname.name, dname.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) dentry = d_lookup(parent, &dname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) dput(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) if (!dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) goto release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) spin_lock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) di = ceph_dentry(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) switch (h->action) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) case CEPH_MDS_LEASE_REVOKE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) if (di->lease_session == session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) if (ceph_seq_cmp(di->lease_seq, seq) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) h->seq = cpu_to_le32(di->lease_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) __ceph_mdsc_drop_dentry_lease(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) release = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) case CEPH_MDS_LEASE_RENEW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) if (di->lease_session == session &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) di->lease_gen == session->s_cap_gen &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) di->lease_renew_from &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) di->lease_renew_after == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) unsigned long duration =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) msecs_to_jiffies(le32_to_cpu(h->duration_ms));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) di->lease_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) di->time = di->lease_renew_from + duration;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) di->lease_renew_after = di->lease_renew_from +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308) (duration >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) di->lease_renew_from = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) spin_unlock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) dput(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) if (!release)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319) release:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) /* let's just reuse the same message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) h->action = CEPH_MDS_LEASE_REVOKE_ACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) ceph_msg_get(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) /* avoid calling iput_final() in mds dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) ceph_async_iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) pr_err("corrupt lease message\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) ceph_msg_dump(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) struct dentry *dentry, char action,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) u32 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) struct ceph_mds_lease *lease;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342) struct inode *dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) int len = sizeof(*lease) + sizeof(u32) + NAME_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) dout("lease_send_msg identry %p %s to mds%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) dentry, ceph_lease_op_name(action), session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) if (!msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351) lease = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352) lease->action = action;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353) lease->seq = cpu_to_le32(seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355) spin_lock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356) dir = d_inode(dentry->d_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357) lease->ino = cpu_to_le64(ceph_ino(dir));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) lease->first = lease->last = cpu_to_le64(ceph_snap(dir));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) put_unaligned_le32(dentry->d_name.len, lease + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361) memcpy((void *)(lease + 1) + 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) dentry->d_name.name, dentry->d_name.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) spin_unlock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) * if this is a preemptive lease RELEASE, no need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) * flush request stream, since the actual request will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367) * soon follow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369) msg->more_to_follow = (action == CEPH_MDS_LEASE_RELEASE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371) ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375) * lock unlock sessions, to wait ongoing session activities
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377) static void lock_unlock_sessions(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) for (i = 0; i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387) mutex_lock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) mutex_unlock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) static void maybe_recover_session(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) struct ceph_fs_client *fsc = mdsc->fsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) if (!ceph_test_mount_opt(fsc, CLEANRECOVER))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402) if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_MOUNTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) if (!READ_ONCE(fsc->blocklisted))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408) if (fsc->last_auto_reconnect &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) pr_info("auto reconnect after blocklisted\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) fsc->last_auto_reconnect = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) ceph_force_reconnect(fsc->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) bool check_session_state(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) switch (s->s_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) case CEPH_MDS_SESSION_OPEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) s->s_state = CEPH_MDS_SESSION_HUNG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423) pr_info("mds%d hung\n", s->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) case CEPH_MDS_SESSION_CLOSING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) /* Should never reach this when we're unmounting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) WARN_ON_ONCE(s->s_ttl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) case CEPH_MDS_SESSION_NEW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) case CEPH_MDS_SESSION_RESTARTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) case CEPH_MDS_SESSION_CLOSED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433) case CEPH_MDS_SESSION_REJECTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441) * If the sequence is incremented while we're waiting on a REQUEST_CLOSE reply,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442) * then we need to retransmit that request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) void inc_session_sequence(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) lockdep_assert_held(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) s->s_seq++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) dout("resending session close request for mds%d\n", s->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454) ret = request_close_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) pr_err("unable to close session to mds%d: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457) s->s_mds, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462) * delayed work -- periodically trim expired leases, renew caps with mds. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463) * the @delay parameter is set to 0 or if it's more than 5 secs, the default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464) * workqueue delay value of 5 secs will be used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466) static void schedule_delayed(struct ceph_mds_client *mdsc, unsigned long delay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) unsigned long max_delay = HZ * 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) /* 5 secs default delay */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471) if (!delay || (delay > max_delay))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) delay = max_delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473) schedule_delayed_work(&mdsc->delayed_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) round_jiffies_relative(delay));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) static void delayed_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) struct ceph_mds_client *mdsc =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480) container_of(work, struct ceph_mds_client, delayed_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) unsigned long delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) int renew_interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) int renew_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) dout("mdsc delayed_work\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488) if (mdsc->stopping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) renew_caps = time_after_eq(jiffies, HZ*renew_interval +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) mdsc->last_renew_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) if (renew_caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) mdsc->last_renew_caps = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) for (i = 0; i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499) struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503) if (!check_session_state(s)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) mutex_lock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) if (renew_caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) send_renew_caps(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) ceph_con_keepalive(&s->s_con);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) if (s->s_state == CEPH_MDS_SESSION_OPEN ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) s->s_state == CEPH_MDS_SESSION_HUNG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) ceph_send_cap_releases(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517) mutex_unlock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) delay = ceph_check_delayed_caps(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526) ceph_queue_cap_reclaim_work(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528) ceph_trim_snapid_map(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) maybe_recover_session(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532) schedule_delayed(mdsc, delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) int ceph_mdsc_init(struct ceph_fs_client *fsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538) struct ceph_mds_client *mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) if (!mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) mdsc->fsc = fsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) mutex_init(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546) mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) if (!mdsc->mdsmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) goto err_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552) init_completion(&mdsc->safe_umount_waiters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) init_waitqueue_head(&mdsc->session_close_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) INIT_LIST_HEAD(&mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) mdsc->sessions = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556) atomic_set(&mdsc->num_sessions, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) mdsc->max_sessions = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) mdsc->stopping = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559) atomic64_set(&mdsc->quotarealms_count, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560) mdsc->quotarealms_inodes = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) mutex_init(&mdsc->quotarealms_inodes_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) mdsc->last_snap_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) init_rwsem(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) mdsc->snap_realms = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565) INIT_LIST_HEAD(&mdsc->snap_empty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) mdsc->num_snap_realms = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) spin_lock_init(&mdsc->snap_empty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568) mdsc->last_tid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569) mdsc->oldest_tid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570) mdsc->request_tree = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571) INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) mdsc->last_renew_caps = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573) INIT_LIST_HEAD(&mdsc->cap_delay_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574) INIT_LIST_HEAD(&mdsc->cap_wait_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575) spin_lock_init(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) INIT_LIST_HEAD(&mdsc->snap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) spin_lock_init(&mdsc->snap_flush_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578) mdsc->last_cap_flush_tid = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) INIT_LIST_HEAD(&mdsc->cap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580) INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581) mdsc->num_cap_flushing = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) spin_lock_init(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583) init_waitqueue_head(&mdsc->cap_flushing_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584) INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585) atomic_set(&mdsc->cap_reclaim_pending, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586) err = ceph_metric_init(&mdsc->metric);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588) goto err_mdsmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590) spin_lock_init(&mdsc->dentry_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591) INIT_LIST_HEAD(&mdsc->dentry_leases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592) INIT_LIST_HEAD(&mdsc->dentry_dir_leases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) ceph_caps_init(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595) ceph_adjust_caps_max_min(mdsc, fsc->mount_options);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597) spin_lock_init(&mdsc->snapid_map_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598) mdsc->snapid_map_tree = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) INIT_LIST_HEAD(&mdsc->snapid_map_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) init_rwsem(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602) mdsc->pool_perm_tree = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604) strscpy(mdsc->nodename, utsname()->nodename,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) sizeof(mdsc->nodename));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) fsc->mdsc = mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) err_mdsmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611) kfree(mdsc->mdsmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612) err_mdsc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) kfree(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618) * Wait for safe replies on open mds requests. If we time out, drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619) * all requests from the tree to avoid dangling dentry refs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621) static void wait_requests(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623) struct ceph_options *opts = mdsc->fsc->client->options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624) struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627) if (__get_oldest_req(mdsc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) dout("wait_requests waiting for requests\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631) wait_for_completion_timeout(&mdsc->safe_umount_waiters,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632) ceph_timeout_jiffies(opts->mount_timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634) /* tear down remaining requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636) while ((req = __get_oldest_req(mdsc))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637) dout("wait_requests timed out on tid %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638) req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639) list_del_init(&req->r_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) __unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) dout("wait_requests done\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648) * called before mount is ro, and before dentries are torn down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649) * (hmm, does this still race with new lookups?)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651) void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653) dout("pre_umount\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654) mdsc->stopping = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656) lock_unlock_sessions(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657) ceph_flush_dirty_caps(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658) wait_requests(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661) * wait for reply handlers to drop their request refs and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662) * their inode/dcache refs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664) ceph_msgr_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666) ceph_cleanup_quotarealms_inodes(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670) * wait for all write mds requests to flush.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672) static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) struct ceph_mds_request *req = NULL, *nextreq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675) struct rb_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678) dout("wait_unsafe_requests want %lld\n", want_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) req = __get_oldest_req(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) while (req && req->r_tid <= want_tid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682) /* find next request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) n = rb_next(&req->r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) if (n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685) nextreq = rb_entry(n, struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687) nextreq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688) if (req->r_op != CEPH_MDS_OP_SETFILELOCK &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689) (req->r_op & CEPH_MDS_OP_WRITE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690) /* write op */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691) ceph_mdsc_get_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) if (nextreq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693) ceph_mdsc_get_request(nextreq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) dout("wait_unsafe_requests wait on %llu (want %llu)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) req->r_tid, want_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697) wait_for_completion(&req->r_safe_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700) if (!nextreq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) break; /* next dne before, so we're done! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702) if (RB_EMPTY_NODE(&nextreq->r_node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) /* next request was removed from tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) ceph_mdsc_put_request(nextreq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705) goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707) ceph_mdsc_put_request(nextreq); /* won't go away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) req = nextreq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712) dout("wait_unsafe_requests done\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715) void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) u64 want_tid, want_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719) if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722) dout("sync\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) want_tid = mdsc->last_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) ceph_flush_dirty_caps(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729) want_flush = mdsc->last_cap_flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) if (!list_empty(&mdsc->cap_flush_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731) struct ceph_cap_flush *cf =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732) list_last_entry(&mdsc->cap_flush_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) struct ceph_cap_flush, g_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) cf->wake = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736) spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) dout("sync want tid %lld flush_seq %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739) want_tid, want_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) wait_unsafe_requests(mdsc, want_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742) wait_caps_flush(mdsc, want_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746) * true if all sessions are closed, or we force unmount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748) static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752) return atomic_read(&mdsc->num_sessions) <= skipped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756) * called after sb is ro.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758) void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760) struct ceph_options *opts = mdsc->fsc->client->options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761) struct ceph_mds_session *session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763) int skipped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) dout("close_sessions\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767) /* close sessions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769) for (i = 0; i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770) session = __ceph_lookup_mds_session(mdsc, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) if (!session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775) if (__close_session(mdsc, session) <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776) skipped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778) ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) dout("waiting for sessions to close\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784) wait_event_timeout(mdsc->session_close_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785) done_closing_sessions(mdsc, skipped),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786) ceph_timeout_jiffies(opts->mount_timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788) /* tear down remaining sessions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) for (i = 0; i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) if (mdsc->sessions[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792) session = ceph_get_mds_session(mdsc->sessions[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) __unregister_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) remove_session_caps(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802) WARN_ON(!list_empty(&mdsc->cap_delay_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) ceph_cleanup_snapid_map(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806) ceph_cleanup_empty_realms(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) cancel_work_sync(&mdsc->cap_reclaim_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809) cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811) dout("stopped\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814) void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816) struct ceph_mds_session *session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817) int mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) dout("force umount\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) for (mds = 0; mds < mdsc->max_sessions; mds++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823) session = __ceph_lookup_mds_session(mdsc, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824) if (!session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827) if (session->s_state == CEPH_MDS_SESSION_REJECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828) __unregister_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) __wake_requests(mdsc, &session->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833) __close_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834) if (session->s_state == CEPH_MDS_SESSION_CLOSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) cleanup_session_requests(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836) remove_session_caps(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839) ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842) kick_requests(mdsc, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844) __wake_requests(mdsc, &mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848) static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) dout("stop\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) * Make sure the delayed work stopped before releasing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853) * the resources.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855) * Because the cancel_delayed_work_sync() will only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) * guarantee that the work finishes executing. But the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857) * delayed work will re-arm itself again after that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859) flush_delayed_work(&mdsc->delayed_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861) if (mdsc->mdsmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) ceph_mdsmap_destroy(mdsc->mdsmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) kfree(mdsc->sessions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864) ceph_caps_finalize(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) ceph_pool_perm_destroy(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) struct ceph_mds_client *mdsc = fsc->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871) dout("mdsc_destroy %p\n", mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873) if (!mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876) /* flush out any connection work with references to us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) ceph_msgr_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879) ceph_mdsc_stop(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) ceph_metric_destroy(&mdsc->metric);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883) fsc->mdsc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884) kfree(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885) dout("mdsc_destroy %p done\n", mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888) void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890) struct ceph_fs_client *fsc = mdsc->fsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891) const char *mds_namespace = fsc->mount_options->mds_namespace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892) void *p = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893) void *end = p + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894) u32 epoch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) u32 map_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896) u32 num_fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897) u32 mount_fscid = (u32)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898) u8 struct_v, struct_cv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899) int err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901) ceph_decode_need(&p, end, sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902) epoch = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904) dout("handle_fsmap epoch %u\n", epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906) ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) struct_v = ceph_decode_8(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908) struct_cv = ceph_decode_8(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909) map_len = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911) ceph_decode_need(&p, end, sizeof(u32) * 3, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) p += sizeof(u32) * 2; /* skip epoch and legacy_client_fscid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914) num_fs = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) while (num_fs-- > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916) void *info_p, *info_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) u32 info_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918) u8 info_v, info_cv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919) u32 fscid, namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) info_v = ceph_decode_8(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923) info_cv = ceph_decode_8(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924) info_len = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925) ceph_decode_need(&p, end, info_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) info_p = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927) info_end = p + info_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928) p = info_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930) ceph_decode_need(&info_p, info_end, sizeof(u32) * 2, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931) fscid = ceph_decode_32(&info_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) namelen = ceph_decode_32(&info_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) ceph_decode_need(&info_p, info_end, namelen, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935) if (mds_namespace &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) strlen(mds_namespace) == namelen &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937) !strncmp(mds_namespace, (char *)info_p, namelen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938) mount_fscid = fscid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943) ceph_monc_got_map(&fsc->client->monc, CEPH_SUB_FSMAP, epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944) if (mount_fscid != (u32)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) fsc->client->monc.fs_cluster_id = mount_fscid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947) 0, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) ceph_monc_renew_subs(&fsc->client->monc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950) err = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) goto err_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956) pr_err("error decoding fsmap\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) err_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959) mdsc->mdsmap_err = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) __wake_requests(mdsc, &mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965) * handle mds map update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967) void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969) u32 epoch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970) u32 maplen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971) void *p = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972) void *end = p + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) struct ceph_mdsmap *newmap, *oldmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974) struct ceph_fsid fsid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975) int err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978) ceph_decode_copy(&p, &fsid, sizeof(fsid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) epoch = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) maplen = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985) /* do we need it? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988) dout("handle_map epoch %u <= our %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) epoch, mdsc->mdsmap->m_epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) newmap = ceph_mdsmap_decode(&p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) if (IS_ERR(newmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996) err = PTR_ERR(newmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997) goto bad_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) /* swap into place */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001) if (mdsc->mdsmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) oldmap = mdsc->mdsmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003) mdsc->mdsmap = newmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) check_new_map(mdsc, newmap, oldmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005) ceph_mdsmap_destroy(oldmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) mdsc->mdsmap = newmap; /* first mds map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) mdsc->fsc->max_file_size = min((loff_t)mdsc->mdsmap->m_max_file_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010) MAX_LFS_FILESIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012) __wake_requests(mdsc, &mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) mdsc->mdsmap->m_epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) schedule_delayed(mdsc, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) bad_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023) pr_err("error decoding mdsmap %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027) static struct ceph_connection *con_get(struct ceph_connection *con)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029) struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) if (ceph_get_mds_session(s))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032) return con;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036) static void con_put(struct ceph_connection *con)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040) ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044) * if the client is unresponsive for long enough, the mds will kill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045) * the session entirely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047) static void peer_reset(struct ceph_connection *con)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050) struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052) pr_warn("mds%d closed our session\n", s->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) send_mds_reconnect(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056) static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059) struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060) int type = le16_to_cpu(msg->hdr.type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062) mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063) if (__verify_registered_session(mdsc, s) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067) mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070) case CEPH_MSG_MDS_MAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) ceph_mdsc_handle_mdsmap(mdsc, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073) case CEPH_MSG_FS_MAP_USER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074) ceph_mdsc_handle_fsmap(mdsc, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076) case CEPH_MSG_CLIENT_SESSION:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) handle_session(s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079) case CEPH_MSG_CLIENT_REPLY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080) handle_reply(s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082) case CEPH_MSG_CLIENT_REQUEST_FORWARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083) handle_forward(mdsc, s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085) case CEPH_MSG_CLIENT_CAPS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086) ceph_handle_caps(s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088) case CEPH_MSG_CLIENT_SNAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089) ceph_handle_snap(mdsc, s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091) case CEPH_MSG_CLIENT_LEASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092) handle_lease(mdsc, s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) case CEPH_MSG_CLIENT_QUOTA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) ceph_handle_quota(mdsc, s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099) pr_err("received unknown message type %d %s\n", type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) ceph_msg_type_name(type));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103) ceph_msg_put(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107) * authentication
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111) * Note: returned pointer is the address of a structure that's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112) * managed separately. Caller must *not* attempt to free it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114) static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115) int *proto, int force_new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117) struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119) struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120) struct ceph_auth_handshake *auth = &s->s_auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122) if (force_new && auth->authorizer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) ceph_auth_destroy_authorizer(auth->authorizer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) auth->authorizer = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) if (!auth->authorizer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128) auth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132) int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133) auth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137) *proto = ac->protocol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139) return auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142) static int add_authorizer_challenge(struct ceph_connection *con,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) void *challenge_buf, int challenge_buf_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145) struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147) struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149) return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150) challenge_buf, challenge_buf_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153) static int verify_authorizer_reply(struct ceph_connection *con)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155) struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156) struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157) struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159) return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) static int invalidate_authorizer(struct ceph_connection *con)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164) struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165) struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168) ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170) return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173) static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174) struct ceph_msg_header *hdr, int *skip)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176) struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177) int type = (int) le16_to_cpu(hdr->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) int front_len = (int) le32_to_cpu(hdr->front_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) if (con->in_msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181) return con->in_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) *skip = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184) msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185) if (!msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186) pr_err("unable to allocate msg type %d len %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187) type, front_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191) return msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194) static int mds_sign_message(struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196) struct ceph_mds_session *s = msg->con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197) struct ceph_auth_handshake *auth = &s->s_auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199) return ceph_auth_sign_message(auth, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202) static int mds_check_message_signature(struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204) struct ceph_mds_session *s = msg->con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205) struct ceph_auth_handshake *auth = &s->s_auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207) return ceph_auth_check_message_signature(auth, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210) static const struct ceph_connection_operations mds_con_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211) .get = con_get,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212) .put = con_put,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213) .dispatch = dispatch,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214) .get_authorizer = get_authorizer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215) .add_authorizer_challenge = add_authorizer_challenge,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216) .verify_authorizer_reply = verify_authorizer_reply,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217) .invalidate_authorizer = invalidate_authorizer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218) .peer_reset = peer_reset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219) .alloc_msg = mds_alloc_msg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220) .sign_message = mds_sign_message,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221) .check_message_signature = mds_check_message_signature,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) /* eof */