Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) #include <linux/ceph/ceph_debug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4) #include <linux/fs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5) #include <linux/wait.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) #include <linux/debugfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/seq_file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/ratelimit.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/bits.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/ktime.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include "super.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include "mds_client.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/ceph/ceph_features.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/ceph/messenger.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/ceph/decode.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/ceph/pagelist.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) #include <linux/ceph/auth.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <linux/ceph/debugfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #define RECONNECT_MAX_SIZE (INT_MAX - PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28)  * A cluster of MDS (metadata server) daemons is responsible for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29)  * managing the file system namespace (the directory hierarchy and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30)  * inodes) and for coordinating shared access to storage.  Metadata is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31)  * partitioning hierarchically across a number of servers, and that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32)  * partition varies over time as the cluster adjusts the distribution
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33)  * in order to balance load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  * The MDS client is primarily responsible to managing synchronous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  * metadata requests for operations like open, unlink, and so forth.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37)  * If there is a MDS failure, we find out about it when we (possibly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38)  * request and) receive a new MDS map, and can resubmit affected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  * requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41)  * For the most part, though, we take advantage of a lossless
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42)  * communications channel to the MDS, and do not need to worry about
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43)  * timing out or resubmitting requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45)  * We maintain a stateful "session" with each MDS we interact with.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46)  * Within each session, we sent periodic heartbeat messages to ensure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47)  * any capabilities or leases we have been issues remain valid.  If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48)  * the session times out and goes stale, our leases and capabilities
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49)  * are no longer valid.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) struct ceph_reconnect_state {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 	struct ceph_mds_session *session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 	int nr_caps, nr_realms;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) 	struct ceph_pagelist *pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 	unsigned msg_version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 	bool allow_multi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) static void __wake_requests(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 			    struct list_head *head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) static void ceph_cap_release_work(struct work_struct *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) static void ceph_cap_reclaim_work(struct work_struct *work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) static const struct ceph_connection_operations mds_con_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69)  * mds reply parsing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) static int parse_reply_info_quota(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 				  struct ceph_mds_reply_info_in *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 	u8 struct_v, struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 	u32 struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 	ceph_decode_8_safe(p, end, struct_v, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	ceph_decode_8_safe(p, end, struct_compat, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	/* struct_v is expected to be >= 1. we only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 	 * understand encoding with struct_compat == 1. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	if (!struct_v || struct_compat != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	ceph_decode_32_safe(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	ceph_decode_need(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	end = *p + struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 	ceph_decode_64_safe(p, end, info->max_bytes, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	ceph_decode_64_safe(p, end, info->max_files, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 	*p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96)  * parse individual inode info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) static int parse_reply_info_in(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 			       struct ceph_mds_reply_info_in *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 			       u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 	u8 struct_v = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 	if (features == (u64)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 		u32 struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 		u8 struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 		ceph_decode_8_safe(p, end, struct_v, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 		ceph_decode_8_safe(p, end, struct_compat, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 		/* struct_v is expected to be >= 1. we only understand
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 		 * encoding with struct_compat == 1. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 		if (!struct_v || struct_compat != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 		ceph_decode_32_safe(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 		ceph_decode_need(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 		end = *p + struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	ceph_decode_need(p, end, sizeof(struct ceph_mds_reply_inode), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	info->in = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 	*p += sizeof(struct ceph_mds_reply_inode) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 		sizeof(*info->in->fragtree.splits) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 		le32_to_cpu(info->in->fragtree.nsplits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 	ceph_decode_32_safe(p, end, info->symlink_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	ceph_decode_need(p, end, info->symlink_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	info->symlink = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	*p += info->symlink_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	ceph_decode_copy_safe(p, end, &info->dir_layout,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 			      sizeof(info->dir_layout), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	ceph_decode_32_safe(p, end, info->xattr_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	ceph_decode_need(p, end, info->xattr_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	info->xattr_data = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 	*p += info->xattr_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	if (features == (u64)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 		/* inline data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 		ceph_decode_64_safe(p, end, info->inline_version, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 		ceph_decode_32_safe(p, end, info->inline_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 		ceph_decode_need(p, end, info->inline_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 		info->inline_data = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 		*p += info->inline_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 		/* quota */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 		err = parse_reply_info_quota(p, end, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 			goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 		/* pool namespace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 		ceph_decode_32_safe(p, end, info->pool_ns_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 		if (info->pool_ns_len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 			ceph_decode_need(p, end, info->pool_ns_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 			info->pool_ns_data = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 			*p += info->pool_ns_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 		/* btime */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 		ceph_decode_need(p, end, sizeof(info->btime), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 		ceph_decode_copy(p, &info->btime, sizeof(info->btime));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 		/* change attribute */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 		ceph_decode_64_safe(p, end, info->change_attr, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 		/* dir pin */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 		if (struct_v >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 			ceph_decode_32_safe(p, end, info->dir_pin, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 			info->dir_pin = -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 		/* snapshot birth time, remains zero for v<=2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 		if (struct_v >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 			ceph_decode_need(p, end, sizeof(info->snap_btime), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 			ceph_decode_copy(p, &info->snap_btime,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 					 sizeof(info->snap_btime));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 			memset(&info->snap_btime, 0, sizeof(info->snap_btime));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 		*p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 		if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 			ceph_decode_64_safe(p, end, info->inline_version, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 			ceph_decode_32_safe(p, end, info->inline_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 			ceph_decode_need(p, end, info->inline_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 			info->inline_data = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 			*p += info->inline_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 			info->inline_version = CEPH_INLINE_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 		if (features & CEPH_FEATURE_MDS_QUOTA) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 			err = parse_reply_info_quota(p, end, info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 			if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 				goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 			info->max_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 			info->max_files = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 		info->pool_ns_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 		info->pool_ns_data = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 		if (features & CEPH_FEATURE_FS_FILE_LAYOUT_V2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 			ceph_decode_32_safe(p, end, info->pool_ns_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 			if (info->pool_ns_len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 				ceph_decode_need(p, end, info->pool_ns_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 				info->pool_ns_data = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 				*p += info->pool_ns_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 		if (features & CEPH_FEATURE_FS_BTIME) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 			ceph_decode_need(p, end, sizeof(info->btime), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 			ceph_decode_copy(p, &info->btime, sizeof(info->btime));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 			ceph_decode_64_safe(p, end, info->change_attr, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 		info->dir_pin = -ENODATA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 		/* info->snap_btime remains zero */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) out_bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) static int parse_reply_info_dir(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 				struct ceph_mds_reply_dirfrag **dirfrag,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 				u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	if (features == (u64)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 		u8 struct_v, struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 		u32 struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 		ceph_decode_8_safe(p, end, struct_v, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 		ceph_decode_8_safe(p, end, struct_compat, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 		/* struct_v is expected to be >= 1. we only understand
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 		 * encoding whose struct_compat == 1. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 		if (!struct_v || struct_compat != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 		ceph_decode_32_safe(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 		ceph_decode_need(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 		end = *p + struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	ceph_decode_need(p, end, sizeof(**dirfrag), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	*dirfrag = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	*p += sizeof(**dirfrag) + sizeof(u32) * le32_to_cpu((*dirfrag)->ndist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	if (unlikely(*p > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) 	if (features == (u64)-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 		*p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 	return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) static int parse_reply_info_lease(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 				  struct ceph_mds_reply_lease **lease,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 				  u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 	if (features == (u64)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 		u8 struct_v, struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 		u32 struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 		ceph_decode_8_safe(p, end, struct_v, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 		ceph_decode_8_safe(p, end, struct_compat, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 		/* struct_v is expected to be >= 1. we only understand
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 		 * encoding whose struct_compat == 1. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 		if (!struct_v || struct_compat != 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 		ceph_decode_32_safe(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 		ceph_decode_need(p, end, struct_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		end = *p + struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 	ceph_decode_need(p, end, sizeof(**lease), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 	*lease = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 	*p += sizeof(**lease);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 	if (features == (u64)-1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 		*p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285)  * parse a normal reply, which may contain a (dir+)dentry and/or a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286)  * target inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) static int parse_reply_info_trace(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 				  struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 				  u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 	if (info->head->is_dentry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 		err = parse_reply_info_in(p, end, &info->diri, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 			goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 		err = parse_reply_info_dir(p, end, &info->dirfrag, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 			goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 		ceph_decode_32_safe(p, end, info->dname_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 		ceph_decode_need(p, end, info->dname_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 		info->dname = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 		*p += info->dname_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 		err = parse_reply_info_lease(p, end, &info->dlease, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 			goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 	if (info->head->is_target) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 		err = parse_reply_info_in(p, end, &info->targeti, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 			goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 	if (unlikely(*p != end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 	err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) out_bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	pr_err("problem parsing mds trace %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331)  * parse readdir results
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) static int parse_reply_info_readdir(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 				struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 				u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	u32 num, i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	err = parse_reply_info_dir(p, end, &info->dir_dir, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 		goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 	ceph_decode_need(p, end, sizeof(num) + 2, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 	num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 		u16 flags = ceph_decode_16(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 		info->dir_end = !!(flags & CEPH_READDIR_FRAG_END);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 		info->dir_complete = !!(flags & CEPH_READDIR_FRAG_COMPLETE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 		info->hash_order = !!(flags & CEPH_READDIR_HASH_ORDER);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 		info->offset_hash = !!(flags & CEPH_READDIR_OFFSET_HASH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 	if (num == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 		goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	BUG_ON(!info->dir_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	if ((unsigned long)(info->dir_entries + num) >
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	    (unsigned long)info->dir_entries + info->dir_buf_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 		pr_err("dir contents are larger than expected\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 		WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	info->dir_nr = num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 	while (num) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 		struct ceph_mds_reply_dir_entry *rde = info->dir_entries + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 		/* dentry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 		ceph_decode_32_safe(p, end, rde->name_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 		ceph_decode_need(p, end, rde->name_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 		rde->name = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 		*p += rde->name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 		dout("parsed dir dname '%.*s'\n", rde->name_len, rde->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 		/* dentry lease */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 		err = parse_reply_info_lease(p, end, &rde->lease, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 			goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 		/* inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 		err = parse_reply_info_in(p, end, &rde->inode, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 			goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 		/* ceph_readdir_prepopulate() will update it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 		rde->offset = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 		i++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 		num--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 	/* Skip over any unrecognized fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	*p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) out_bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 	pr_err("problem parsing dir contents %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401)  * parse fcntl F_GETLK results
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) static int parse_reply_info_filelock(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 				     struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 				     u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	if (*p + sizeof(*info->filelock_reply) > end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 	info->filelock_reply = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	/* Skip over any unrecognized fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	*p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 	return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) #if BITS_PER_LONG == 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) #define DELEGATED_INO_AVAILABLE		xa_mk_value(1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) static int ceph_parse_deleg_inos(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 				 struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	u32 sets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	ceph_decode_32_safe(p, end, sets, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	dout("got %u sets of delegated inodes\n", sets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	while (sets--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 		u64 start, len, ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 		ceph_decode_64_safe(p, end, start, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 		ceph_decode_64_safe(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 		/* Don't accept a delegation of system inodes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 		if (start < CEPH_INO_SYSTEM_BASE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 			pr_warn_ratelimited("ceph: ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 					start, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 		while (len--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 			int err = xa_insert(&s->s_delegated_inos, ino = start++,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 					    DELEGATED_INO_AVAILABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 					    GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 			if (!err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 				dout("added delegated inode 0x%llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 				     start - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 			} else if (err == -EBUSY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 				pr_warn("ceph: MDS delegated inode 0x%llx more than once.\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 					start - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 				return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) u64 ceph_get_deleg_ino(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	unsigned long ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 	void *val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	xa_for_each(&s->s_delegated_inos, ino, val) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 		val = xa_erase(&s->s_delegated_inos, ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 		if (val == DELEGATED_INO_AVAILABLE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 			return ino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) int ceph_restore_deleg_ino(struct ceph_mds_session *s, u64 ino)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	return xa_insert(&s->s_delegated_inos, ino, DELEGATED_INO_AVAILABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 			 GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) #else /* BITS_PER_LONG == 64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483)  * FIXME: xarrays can't handle 64-bit indexes on a 32-bit arch. For now, just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484)  * ignore delegated_inos on 32 bit arch. Maybe eventually add xarrays for top
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485)  * and bottom words?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) static int ceph_parse_deleg_inos(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 				 struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 	u32 sets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	ceph_decode_32_safe(p, end, sets, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	if (sets)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 		ceph_decode_skip_n(p, end, sets * 2 * sizeof(__le64), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 	return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) u64 ceph_get_deleg_ino(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) int ceph_restore_deleg_ino(struct ceph_mds_session *s, u64 ino)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) #endif /* BITS_PER_LONG == 64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512)  * parse create results
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) static int parse_reply_info_create(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 				  struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 				  u64 features, struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	if (features == (u64)-1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	    (features & CEPH_FEATURE_REPLY_CREATE_INODE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 		if (*p == end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 			/* Malformed reply? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 			info->has_create_ino = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 		} else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 			u8 struct_v, struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 			u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 			info->has_create_ino = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 			ceph_decode_8_safe(p, end, struct_v, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 			ceph_decode_8_safe(p, end, struct_compat, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 			ceph_decode_32_safe(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 			ceph_decode_64_safe(p, end, info->ino, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 			ret = ceph_parse_deleg_inos(p, end, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 			if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 				return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 			/* legacy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 			ceph_decode_64_safe(p, end, info->ino, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 			info->has_create_ino = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 		if (*p != end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	/* Skip over any unrecognized fields */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 	*p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555)  * parse extra results
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) static int parse_reply_info_extra(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 				  struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 				  u64 features, struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 	u32 op = le32_to_cpu(info->head->op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 	if (op == CEPH_MDS_OP_GETFILELOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 		return parse_reply_info_filelock(p, end, info, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 	else if (op == CEPH_MDS_OP_READDIR || op == CEPH_MDS_OP_LSSNAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 		return parse_reply_info_readdir(p, end, info, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 	else if (op == CEPH_MDS_OP_CREATE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 		return parse_reply_info_create(p, end, info, features, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 		return -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574)  * parse entire mds reply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) static int parse_reply_info(struct ceph_mds_session *s, struct ceph_msg *msg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 			    struct ceph_mds_reply_info_parsed *info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 			    u64 features)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	void *p, *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 	u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 	info->head = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	p = msg->front.iov_base + sizeof(struct ceph_mds_reply_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	end = p + msg->front.iov_len - sizeof(struct ceph_mds_reply_head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	/* trace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	ceph_decode_32_safe(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 		ceph_decode_need(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 		err = parse_reply_info_trace(&p, p+len, info, features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 			goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 	/* extra */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 	ceph_decode_32_safe(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		ceph_decode_need(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 		err = parse_reply_info_extra(&p, p+len, info, features, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 			goto out_bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	/* snap blob */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	ceph_decode_32_safe(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	info->snapblob_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 	info->snapblob = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	if (p != end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) out_bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	pr_err("mds parse_reply err %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) static void destroy_reply_info(struct ceph_mds_reply_info_parsed *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	if (!info->dir_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	free_pages((unsigned long)info->dir_entries, get_order(info->dir_buf_size));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632)  * sessions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) const char *ceph_session_state_name(int s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	switch (s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	case CEPH_MDS_SESSION_NEW: return "new";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	case CEPH_MDS_SESSION_OPENING: return "opening";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	case CEPH_MDS_SESSION_OPEN: return "open";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	case CEPH_MDS_SESSION_HUNG: return "hung";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	case CEPH_MDS_SESSION_CLOSING: return "closing";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	case CEPH_MDS_SESSION_CLOSED: return "closed";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	case CEPH_MDS_SESSION_RESTARTING: return "restarting";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 	case CEPH_MDS_SESSION_RECONNECTING: return "reconnecting";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	case CEPH_MDS_SESSION_REJECTED: return "rejected";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	default: return "???";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) struct ceph_mds_session *ceph_get_mds_session(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	if (refcount_inc_not_zero(&s->s_ref)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 		dout("mdsc get_session %p %d -> %d\n", s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 		     refcount_read(&s->s_ref)-1, refcount_read(&s->s_ref));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 		return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 		dout("mdsc get_session %p 0 -- FAIL\n", s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) void ceph_put_mds_session(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 	if (IS_ERR_OR_NULL(s))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 	dout("mdsc put_session %p %d -> %d\n", s,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	     refcount_read(&s->s_ref), refcount_read(&s->s_ref)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	if (refcount_dec_and_test(&s->s_ref)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 		if (s->s_auth.authorizer)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 			ceph_auth_destroy_authorizer(s->s_auth.authorizer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 		WARN_ON(mutex_is_locked(&s->s_mutex));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 		xa_destroy(&s->s_delegated_inos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 		kfree(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679)  * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) struct ceph_mds_session *__ceph_lookup_mds_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 						   int mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 	if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	return ceph_get_mds_session(mdsc->sessions[mds]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) static bool __have_session(struct ceph_mds_client *mdsc, int mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	if (mds >= mdsc->max_sessions || !mdsc->sessions[mds])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) static int __verify_registered_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 				       struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	if (s->s_mds >= mdsc->max_sessions ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 	    mdsc->sessions[s->s_mds] != s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 		return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707)  * create+register a new session for given mds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708)  * called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) static struct ceph_mds_session *register_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 						 int mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 	struct ceph_mds_session *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	if (mds >= mdsc->mdsmap->possible_max_rank)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	s = kzalloc(sizeof(*s), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 	if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	if (mds >= mdsc->max_sessions) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 		int newmax = 1 << get_count_order(mds + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 		struct ceph_mds_session **sa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 		dout("%s: realloc to %d\n", __func__, newmax);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 		sa = kcalloc(newmax, sizeof(void *), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 		if (!sa)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 			goto fail_realloc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 		if (mdsc->sessions) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 			memcpy(sa, mdsc->sessions,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 			       mdsc->max_sessions * sizeof(void *));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 			kfree(mdsc->sessions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 		mdsc->sessions = sa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 		mdsc->max_sessions = newmax;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	dout("%s: mds%d\n", __func__, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 	s->s_mdsc = mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	s->s_mds = mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 	s->s_state = CEPH_MDS_SESSION_NEW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 	s->s_ttl = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 	s->s_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	mutex_init(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 	spin_lock_init(&s->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	s->s_cap_gen = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	s->s_cap_ttl = jiffies - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	spin_lock_init(&s->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	s->s_renew_requested = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	s->s_renew_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	INIT_LIST_HEAD(&s->s_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	s->s_nr_caps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	refcount_set(&s->s_ref, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 	INIT_LIST_HEAD(&s->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	INIT_LIST_HEAD(&s->s_unsafe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	xa_init(&s->s_delegated_inos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	s->s_num_cap_releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	s->s_cap_reconnect = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 	s->s_cap_iterator = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	INIT_LIST_HEAD(&s->s_cap_releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 	INIT_WORK(&s->s_cap_release_work, ceph_cap_release_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	INIT_LIST_HEAD(&s->s_cap_dirty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	INIT_LIST_HEAD(&s->s_cap_flushing);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 	mdsc->sessions[mds] = s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 	atomic_inc(&mdsc->num_sessions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	refcount_inc(&s->s_ref);  /* one ref to sessions[], one to caller */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 	ceph_con_open(&s->s_con, CEPH_ENTITY_TYPE_MDS, mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 		      ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) fail_realloc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	kfree(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 	return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786)  * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) static void __unregister_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 			       struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	dout("__unregister_session mds%d %p\n", s->s_mds, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	BUG_ON(mdsc->sessions[s->s_mds] != s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 	mdsc->sessions[s->s_mds] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	ceph_con_close(&s->s_con);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 	ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	atomic_dec(&mdsc->num_sessions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800)  * drop session refs in request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802)  * should be last request ref, or hold mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) static void put_request_session(struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	if (req->r_session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		ceph_put_mds_session(req->r_session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 		req->r_session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) void ceph_mdsc_release_request(struct kref *kref)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 	struct ceph_mds_request *req = container_of(kref,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 						    struct ceph_mds_request,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 						    r_kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	ceph_mdsc_release_dir_caps_no_check(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	destroy_reply_info(&req->r_reply_info);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	if (req->r_request)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 		ceph_msg_put(req->r_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	if (req->r_reply)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 		ceph_msg_put(req->r_reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	if (req->r_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 		ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 		/* avoid calling iput_final() in mds dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 		ceph_async_iput(req->r_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 	if (req->r_parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 		ceph_put_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 		ceph_async_iput(req->r_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 	ceph_async_iput(req->r_target_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 	if (req->r_dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 		dput(req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 	if (req->r_old_dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 		dput(req->r_old_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 	if (req->r_old_dentry_dir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 		 * track (and drop pins for) r_old_dentry_dir
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 		 * separately, since r_old_dentry's d_parent may have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 		 * changed between the dir mutex being dropped and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 		 * this request being freed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 		ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 				  CEPH_CAP_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 		ceph_async_iput(req->r_old_dentry_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	kfree(req->r_path1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	kfree(req->r_path2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 	if (req->r_pagelist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 		ceph_pagelist_release(req->r_pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 	put_request_session(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	ceph_unreserve_caps(req->r_mdsc, &req->r_caps_reservation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	WARN_ON_ONCE(!list_empty(&req->r_wait));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	kmem_cache_free(ceph_mds_request_cachep, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) DEFINE_RB_FUNCS(request, struct ceph_mds_request, r_tid, r_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861)  * lookup session, bump ref if found.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863)  * called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) static struct ceph_mds_request *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) lookup_get_request(struct ceph_mds_client *mdsc, u64 tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 	req = lookup_request(&mdsc->request_tree, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	if (req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 		ceph_mdsc_get_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	return req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878)  * Register an in-flight request, and assign a tid.  Link to directory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879)  * are modifying (if any).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881)  * Called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) static void __register_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 			       struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 			       struct inode *dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	req->r_tid = ++mdsc->last_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	if (req->r_num_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		ret = ceph_reserve_caps(mdsc, &req->r_caps_reservation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 					req->r_num_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 		if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 			pr_err("__register_request %p "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 			       "failed to reserve caps: %d\n", req, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 			/* set req->r_err to fail early from __do_request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 			req->r_err = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	dout("__register_request %p tid %lld\n", req, req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	ceph_mdsc_get_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	insert_request(&mdsc->request_tree, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	req->r_uid = current_fsuid();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	req->r_gid = current_fsgid();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 		mdsc->oldest_tid = req->r_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	if (dir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 		struct ceph_inode_info *ci = ceph_inode(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 		ihold(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 		req->r_unsafe_dir = dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 		spin_lock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 		list_add_tail(&req->r_unsafe_dir_item, &ci->i_unsafe_dirops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 		spin_unlock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) static void __unregister_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 				 struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 	dout("__unregister_request %p tid %lld\n", req, req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	/* Never leave an unregistered request on an unsafe list! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	list_del_init(&req->r_unsafe_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	if (req->r_tid == mdsc->oldest_tid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 		struct rb_node *p = rb_next(&req->r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 		mdsc->oldest_tid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 		while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 			struct ceph_mds_request *next_req =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 				rb_entry(p, struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 			if (next_req->r_op != CEPH_MDS_OP_SETFILELOCK) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 				mdsc->oldest_tid = next_req->r_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 			p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	erase_request(&mdsc->request_tree, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	if (req->r_unsafe_dir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 		struct ceph_inode_info *ci = ceph_inode(req->r_unsafe_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 		spin_lock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 		list_del_init(&req->r_unsafe_dir_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 		spin_unlock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 	if (req->r_target_inode &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 	    test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 		struct ceph_inode_info *ci = ceph_inode(req->r_target_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 		spin_lock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 		list_del_init(&req->r_unsafe_target_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 		spin_unlock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 	if (req->r_unsafe_dir) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 		/* avoid calling iput_final() in mds dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 		ceph_async_iput(req->r_unsafe_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 		req->r_unsafe_dir = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 	complete_all(&req->r_safe_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972)  * Walk back up the dentry tree until we hit a dentry representing a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973)  * non-snapshot inode. We do this using the rcu_read_lock (which must be held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974)  * when calling this) to ensure that the objects won't disappear while we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975)  * working with them. Once we hit a candidate dentry, we attempt to take a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976)  * reference to it, and return that as the result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) static struct inode *get_nonsnap_parent(struct dentry *dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	struct inode *inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	while (dentry && !IS_ROOT(dentry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 		inode = d_inode_rcu(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 		if (!inode || ceph_snap(inode) == CEPH_NOSNAP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 		dentry = dentry->d_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	if (inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 		inode = igrab(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 	return inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994)  * Choose mds to send request to next.  If there is a hint set in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995)  * request (e.g., due to a prior forward hint from the mds), use that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996)  * Otherwise, consult frag tree and/or caps to identify the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997)  * appropriate mds.  If all else fails, choose randomly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999)  * Called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) static int __choose_mds(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 			struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 			bool *random)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 	struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 	struct ceph_inode_info *ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 	int mode = req->r_direct_mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 	int mds = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 	u32 hash = req->r_direct_hash;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	bool is_hash = test_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	if (random)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 		*random = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	 * is there a specific mds we should try?  ignore hint if we have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 	 * no session and the mds is not up (active or recovering).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	if (req->r_resend_mds >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	    (__have_session(mdsc, req->r_resend_mds) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	     ceph_mdsmap_get_state(mdsc->mdsmap, req->r_resend_mds) > 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 		dout("%s using resend_mds mds%d\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 		     req->r_resend_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 		return req->r_resend_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 	if (mode == USE_RANDOM_MDS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 		goto random;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 	inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 	if (req->r_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 		if (ceph_snap(req->r_inode) != CEPH_SNAPDIR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 			inode = req->r_inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 			ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 			/* req->r_dentry is non-null for LSSNAP request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 			rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 			inode = get_nonsnap_parent(req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 			rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 			dout("%s using snapdir's parent %p\n", __func__, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 	} else if (req->r_dentry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 		/* ignore race with rename; old or new d_parent is okay */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 		struct dentry *parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 		struct inode *dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 		rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 		parent = READ_ONCE(req->r_dentry->d_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 		dir = req->r_parent ? : d_inode_rcu(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 		if (!dir || dir->i_sb != mdsc->fsc->sb) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 			/*  not this fs or parent went negative */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 			inode = d_inode(req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 			if (inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 				ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		} else if (ceph_snap(dir) != CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 			/* direct snapped/virtual snapdir requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 			 * based on parent dir inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 			inode = get_nonsnap_parent(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 			dout("%s using nonsnap parent %p\n", __func__, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 			/* dentry target */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 			inode = d_inode(req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 			if (!inode || mode == USE_AUTH_MDS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 				/* dir + name */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 				inode = igrab(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 				hash = ceph_dentry_hash(dir, req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 				is_hash = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 				ihold(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	dout("%s %p is_hash=%d (0x%x) mode %d\n", __func__, inode, (int)is_hash,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	     hash, mode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	if (!inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 		goto random;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 	ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	if (is_hash && S_ISDIR(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 		struct ceph_inode_frag frag;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 		int found;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 		ceph_choose_frag(ci, hash, &frag, &found);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 		if (found) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 			if (mode == USE_ANY_MDS && frag.ndist > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 				u8 r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 				/* choose a random replica */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 				get_random_bytes(&r, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 				r %= frag.ndist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 				mds = frag.dist[r];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 				dout("%s %p %llx.%llx frag %u mds%d (%d/%d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 				     __func__, inode, ceph_vinop(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 				     frag.frag, mds, (int)r, frag.ndist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 				    CEPH_MDS_STATE_ACTIVE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 				    !ceph_mdsmap_is_laggy(mdsc->mdsmap, mds))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 					goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 			/* since this file/dir wasn't known to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 			 * replicated, then we want to look for the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 			 * authoritative mds. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 			if (frag.mds >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 				/* choose auth mds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 				mds = frag.mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 				dout("%s %p %llx.%llx frag %u mds%d (auth)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 				     __func__, inode, ceph_vinop(inode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 				     frag.frag, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 				if (ceph_mdsmap_get_state(mdsc->mdsmap, mds) >=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 				    CEPH_MDS_STATE_ACTIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 					if (!ceph_mdsmap_is_laggy(mdsc->mdsmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 								  mds))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 						goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 			mode = USE_AUTH_MDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 	spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 	cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 	if (mode == USE_AUTH_MDS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 		cap = ci->i_auth_cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	if (!cap && !RB_EMPTY_ROOT(&ci->i_caps))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 		cap = rb_entry(rb_first(&ci->i_caps), struct ceph_cap, ci_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	if (!cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 		spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 		ceph_async_iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 		goto random;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	mds = cap->session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	dout("%s %p %llx.%llx mds%d (%scap %p)\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	     inode, ceph_vinop(inode), mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	     cap == ci->i_auth_cap ? "auth " : "", cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	/* avoid calling iput_final() while holding mdsc->mutex or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	 * in mds dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	ceph_async_iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	return mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) random:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	if (random)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 		*random = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	mds = ceph_mdsmap_get_random_mds(mdsc->mdsmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 	dout("%s chose random mds%d\n", __func__, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 	return mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)  * session messages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) static struct ceph_msg *create_session_msg(u32 op, u64 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	struct ceph_mds_session_head *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h), GFP_NOFS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 			   false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	if (!msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 		pr_err("create_session_msg ENOMEM creating msg\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	h = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	h->op = cpu_to_le32(op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	h->seq = cpu_to_le64(seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	return msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) static const unsigned char feature_bits[] = CEPHFS_FEATURES_CLIENT_SUPPORTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) #define FEATURE_BYTES(c) (DIV_ROUND_UP((size_t)feature_bits[c - 1] + 1, 64) * 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) static int encode_supported_features(void **p, void *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	static const size_t count = ARRAY_SIZE(feature_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	if (count > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 		size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 		size_t size = FEATURE_BYTES(count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 		if (WARN_ON_ONCE(*p + 4 + size > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 			return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		ceph_encode_32(p, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 		memset(*p, 0, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 		for (i = 0; i < count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 			((unsigned char*)(*p))[i / 8] |= BIT(feature_bits[i] % 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 		*p += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 		if (WARN_ON_ONCE(*p + 4 > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 			return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 		ceph_encode_32(p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) static const unsigned char metric_bits[] = CEPHFS_METRIC_SPEC_CLIENT_SUPPORTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) #define METRIC_BYTES(cnt) (DIV_ROUND_UP((size_t)metric_bits[cnt - 1] + 1, 64) * 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) static int encode_metric_spec(void **p, void *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	static const size_t count = ARRAY_SIZE(metric_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	/* header */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 	if (WARN_ON_ONCE(*p + 2 > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 		return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	ceph_encode_8(p, 1); /* version */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	ceph_encode_8(p, 1); /* compat */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	if (count > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 		size_t i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 		size_t size = METRIC_BYTES(count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 		if (WARN_ON_ONCE(*p + 4 + 4 + size > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 			return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 		/* metric spec info length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 		ceph_encode_32(p, 4 + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 		/* metric spec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 		ceph_encode_32(p, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 		memset(*p, 0, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 		for (i = 0; i < count; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 			((unsigned char *)(*p))[i / 8] |= BIT(metric_bits[i] % 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 		*p += size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 		if (WARN_ON_ONCE(*p + 4 + 4 > end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 			return -ERANGE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 		/* metric spec info length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 		ceph_encode_32(p, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 		/* metric spec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 		ceph_encode_32(p, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249)  * session message, specialization for CEPH_SESSION_REQUEST_OPEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250)  * to include additional client metadata fields.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 	struct ceph_mds_session_head *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	int i = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	int extra_bytes = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 	int metadata_key_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 	struct ceph_options *opt = mdsc->fsc->client->options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 	struct ceph_mount_options *fsopt = mdsc->fsc->mount_options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	size_t size, count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	void *p, *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	const char* metadata[][2] = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 		{"hostname", mdsc->nodename},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 		{"kernel_version", init_utsname()->release},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 		{"entity_id", opt->name ? : ""},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 		{"root", fsopt->server_path ? : "/"},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 		{NULL, NULL}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	/* Calculate serialized length of metadata */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 	extra_bytes = 4;  /* map length */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	for (i = 0; metadata[i][0]; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 		extra_bytes += 8 + strlen(metadata[i][0]) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 			strlen(metadata[i][1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 		metadata_key_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	/* supported feature */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	count = ARRAY_SIZE(feature_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	if (count > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 		size = FEATURE_BYTES(count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	extra_bytes += 4 + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	/* metric spec */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 	size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	count = ARRAY_SIZE(metric_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 	if (count > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 		size = METRIC_BYTES(count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 	extra_bytes += 2 + 4 + 4 + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 	/* Allocate the message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 	msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 			   GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 	if (!msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 		pr_err("create_session_msg ENOMEM creating msg\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	p = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	end = p + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 	h = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 	h->seq = cpu_to_le64(seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 	 * Serialize client metadata into waiting buffer space, using
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 	 * the format that userspace expects for map<string, string>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 	 * ClientSession messages with metadata are v4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	msg->hdr.version = cpu_to_le16(4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 	msg->hdr.compat_version = cpu_to_le16(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 	/* The write pointer, following the session_head structure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 	p += sizeof(*h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 	/* Number of entries in the map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	ceph_encode_32(&p, metadata_key_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	/* Two length-prefixed strings for each entry in the map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 	for (i = 0; metadata[i][0]; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 		size_t const key_len = strlen(metadata[i][0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 		size_t const val_len = strlen(metadata[i][1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 		ceph_encode_32(&p, key_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 		memcpy(p, metadata[i][0], key_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 		p += key_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 		ceph_encode_32(&p, val_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 		memcpy(p, metadata[i][1], val_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 		p += val_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	ret = encode_supported_features(&p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 		pr_err("encode_supported_features failed!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 		ceph_msg_put(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 		return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	ret = encode_metric_spec(&p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 		pr_err("encode_metric_spec failed!\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 		ceph_msg_put(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 		return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	msg->front.iov_len = p - msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	return msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358)  * send session open request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360)  * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) static int __open_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 			  struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 	struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 	int mstate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	/* wait for mds to go active? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 	mstate = ceph_mdsmap_get_state(mdsc->mdsmap, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	dout("open_session to mds%d (%s)\n", mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	     ceph_mds_state_name(mstate));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	session->s_state = CEPH_MDS_SESSION_OPENING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 	session->s_renew_requested = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 	/* send connect message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 	msg = create_session_open_msg(mdsc, session->s_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 	if (IS_ERR(msg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 		return PTR_ERR(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 	ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385)  * open sessions for any export targets for the given mds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387)  * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) static struct ceph_mds_session *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) __open_export_target_session(struct ceph_mds_client *mdsc, int target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 	struct ceph_mds_session *session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 	session = __ceph_lookup_mds_session(mdsc, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 	if (!session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 		session = register_session(mdsc, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 		if (IS_ERR(session))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 			return session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 	if (session->s_state == CEPH_MDS_SESSION_NEW ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 	    session->s_state == CEPH_MDS_SESSION_CLOSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 		ret = __open_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 			return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 	return session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) struct ceph_mds_session *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) ceph_mdsc_open_export_target_session(struct ceph_mds_client *mdsc, int target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	struct ceph_mds_session *session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 	dout("open_export_target_session to mds%d\n", target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 	session = __open_export_target_session(mdsc, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 	return session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) static void __open_export_target_sessions(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 					  struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 	struct ceph_mds_info *mi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 	struct ceph_mds_session *ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 	int i, mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 	if (mds >= mdsc->mdsmap->possible_max_rank)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 	mi = &mdsc->mdsmap->m_info[mds];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 	dout("open_export_target_sessions for mds%d (%d targets)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 	     session->s_mds, mi->num_export_targets);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 	for (i = 0; i < mi->num_export_targets; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 		ts = __open_export_target_session(mdsc, mi->export_targets[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 		ceph_put_mds_session(ts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) void ceph_mdsc_open_export_target_sessions(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 					   struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 	__open_export_target_sessions(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454)  * session caps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) static void detach_cap_releases(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 				struct list_head *target)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 	lockdep_assert_held(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	list_splice_init(&session->s_cap_releases, target);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 	session->s_num_cap_releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 	dout("dispose_cap_releases mds%d\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) static void dispose_cap_releases(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 				 struct list_head *dispose)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 	while (!list_empty(dispose)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 		struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 		/* zero out the in-progress message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 		cap = list_first_entry(dispose, struct ceph_cap, session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 		list_del(&cap->session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 		ceph_put_cap(mdsc, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) static void cleanup_session_requests(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 				     struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 	struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 	struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	dout("cleanup_session_requests mds%d\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 	while (!list_empty(&session->s_unsafe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 		req = list_first_entry(&session->s_unsafe,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 				       struct ceph_mds_request, r_unsafe_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 		pr_warn_ratelimited(" dropping unsafe request %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 				    req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 		if (req->r_target_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 			mapping_set_error(req->r_target_inode->i_mapping, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 		if (req->r_unsafe_dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 			mapping_set_error(req->r_unsafe_dir->i_mapping, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 		__unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 	/* zero r_attempts, so kick_requests() will re-send requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	p = rb_first(&mdsc->request_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 		req = rb_entry(p, struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 		p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 		if (req->r_session &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 		    req->r_session->s_mds == session->s_mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 			req->r_attempts = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511)  * Helper to safely iterate over all caps associated with a session, with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512)  * special care taken to handle a racing __ceph_remove_cap().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514)  * Caller must hold session s_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) int ceph_iterate_session_caps(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 			      int (*cb)(struct inode *, struct ceph_cap *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 					void *), void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 	struct list_head *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 	struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 	struct inode *inode, *last_inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 	struct ceph_cap *old_cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 	dout("iterate_session_caps %p mds%d\n", session, session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 	p = session->s_caps.next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 	while (p != &session->s_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 		cap = list_entry(p, struct ceph_cap, session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 		inode = igrab(&cap->ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 		if (!inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 			p = p->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 		session->s_cap_iterator = cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 		spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 		if (last_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 			/* avoid calling iput_final() while holding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 			 * s_mutex or in mds dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 			ceph_async_iput(last_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 			last_inode = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 		if (old_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 			ceph_put_cap(session->s_mdsc, old_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 			old_cap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 		ret = cb(inode, cap, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 		last_inode = inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 		spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 		p = p->next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 		if (!cap->ci) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 			dout("iterate_session_caps  finishing cap %p removal\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 			     cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 			BUG_ON(cap->session != session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 			cap->session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 			list_del_init(&cap->session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 			session->s_nr_caps--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 			atomic64_dec(&session->s_mdsc->metric.total_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 			if (cap->queue_release)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 				__ceph_queue_cap_release(session, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 				old_cap = cap;  /* put_cap it w/o locks held */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 		if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 	session->s_cap_iterator = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) 	spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) 	ceph_async_iput(last_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 	if (old_cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) 		ceph_put_cap(session->s_mdsc, old_cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) static int remove_capsnaps(struct ceph_mds_client *mdsc, struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	struct ceph_cap_snap *capsnap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 	int capsnap_release = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 	lockdep_assert_held(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 	dout("removing capsnaps, ci is %p, inode is %p\n", ci, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 	while (!list_empty(&ci->i_cap_snaps)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 		capsnap = list_first_entry(&ci->i_cap_snaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 					   struct ceph_cap_snap, ci_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 		__ceph_remove_capsnap(inode, capsnap, NULL, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 		ceph_put_snap_context(capsnap->context);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 		ceph_put_cap_snap(capsnap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 		capsnap_release++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 	wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 	wake_up_all(&mdsc->cap_flushing_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 	return capsnap_release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 				  void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 	struct ceph_fs_client *fsc = (struct ceph_fs_client *)arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 	struct ceph_mds_client *mdsc = fsc->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	LIST_HEAD(to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	bool dirty_dropped = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	bool invalidate = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	int capsnap_release = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 	dout("removing cap %p, ci is %p, inode is %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 	     cap, ci, &ci->vfs_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 	spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	__ceph_remove_cap(cap, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	if (!ci->i_auth_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 		struct ceph_cap_flush *cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 		if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 			if (inode->i_data.nrpages > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 				invalidate = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 			if (ci->i_wrbuffer_ref > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 				mapping_set_error(&inode->i_data, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 		while (!list_empty(&ci->i_cap_flush_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 			cf = list_first_entry(&ci->i_cap_flush_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 					      struct ceph_cap_flush, i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 			list_move(&cf->i_list, &to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 		spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 		list_for_each_entry(cf, &to_remove, i_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 			list_del_init(&cf->g_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 		if (!list_empty(&ci->i_dirty_item)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 			pr_warn_ratelimited(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 				" dropping dirty %s state for %p %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 				ceph_cap_string(ci->i_dirty_caps),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 				inode, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 			ci->i_dirty_caps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 			list_del_init(&ci->i_dirty_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 			dirty_dropped = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 		if (!list_empty(&ci->i_flushing_item)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 			pr_warn_ratelimited(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 				" dropping dirty+flushing %s state for %p %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 				ceph_cap_string(ci->i_flushing_caps),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 				inode, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 			ci->i_flushing_caps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 			list_del_init(&ci->i_flushing_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 			mdsc->num_cap_flushing--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 			dirty_dropped = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 		spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 		if (dirty_dropped) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 			mapping_set_error(inode->i_mapping, -EIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 			if (ci->i_wrbuffer_ref_head == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 			    ci->i_wr_ref == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 			    ci->i_dirty_caps == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 			    ci->i_flushing_caps == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 				ceph_put_snap_context(ci->i_head_snapc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 				ci->i_head_snapc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 		if (atomic_read(&ci->i_filelock_ref) > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 			/* make further file lock syscall return -EIO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 			ci->i_ceph_flags |= CEPH_I_ERROR_FILELOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 			pr_warn_ratelimited(" dropping file locks for %p %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 					    inode, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 		if (!ci->i_dirty_caps && ci->i_prealloc_cap_flush) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 			list_add(&ci->i_prealloc_cap_flush->i_list, &to_remove);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 			ci->i_prealloc_cap_flush = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 		if (!list_empty(&ci->i_cap_snaps))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 			capsnap_release = remove_capsnaps(mdsc, inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 	spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 	while (!list_empty(&to_remove)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 		struct ceph_cap_flush *cf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 		cf = list_first_entry(&to_remove,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 				      struct ceph_cap_flush, i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 		list_del_init(&cf->i_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 		if (!cf->is_capsnap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 			ceph_free_cap_flush(cf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 	wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 	if (invalidate)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 		ceph_queue_invalidate(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 	if (dirty_dropped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 		iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 	while (capsnap_release--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 		iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711)  * caller must hold session s_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) static void remove_session_caps(struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 	struct ceph_fs_client *fsc = session->s_mdsc->fsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 	struct super_block *sb = fsc->sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 	LIST_HEAD(dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 	dout("remove_session_caps on %p\n", session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 	ceph_iterate_session_caps(session, remove_session_caps_cb, fsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 	wake_up_all(&fsc->mdsc->cap_flushing_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 	spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 	if (session->s_nr_caps > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 		struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 		struct ceph_cap *cap, *prev = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 		struct ceph_vino vino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 		 * iterate_session_caps() skips inodes that are being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 		 * deleted, we need to wait until deletions are complete.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 		 * __wait_on_freeing_inode() is designed for the job,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 		 * but it is not exported, so use lookup inode function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 		 * to access it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 		while (!list_empty(&session->s_caps)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 			cap = list_entry(session->s_caps.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 					 struct ceph_cap, session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 			if (cap == prev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 			prev = cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 			vino = cap->ci->i_vino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 			spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 			inode = ceph_find_inode(sb, vino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 			 /* avoid calling iput_final() while holding s_mutex */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 			ceph_async_iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 			spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 	// drop cap expires and unlock s_cap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	detach_cap_releases(session, &dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 	BUG_ON(session->s_nr_caps > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 	BUG_ON(!list_empty(&session->s_cap_flushing));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 	spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 	dispose_cap_releases(session->s_mdsc, &dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 	RECONNECT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 	RENEWCAPS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 	FORCE_RO,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769)  * wake up any threads waiting on this session's caps.  if the cap is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770)  * old (didn't get renewed on the client reconnect), remove it now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772)  * caller must hold s_mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) static int wake_up_session_cb(struct inode *inode, struct ceph_cap *cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 			      void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 	unsigned long ev = (unsigned long)arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 	if (ev == RECONNECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 		spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 		ci->i_wanted_max_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 		ci->i_requested_max_size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 		spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 	} else if (ev == RENEWCAPS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 		if (cap->cap_gen < cap->session->s_cap_gen) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 			/* mds did not re-issue stale cap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 			spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 			cap->issued = cap->implemented = CEPH_CAP_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 			spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 	} else if (ev == FORCE_RO) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 	wake_up_all(&ci->i_cap_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) static void wake_up_session_caps(struct ceph_mds_session *session, int ev)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 	dout("wake_up_session_caps %p mds%d\n", session, session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 	ceph_iterate_session_caps(session, wake_up_session_cb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 				  (void *)(unsigned long)ev);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806)  * Send periodic message to MDS renewing all currently held caps.  The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807)  * ack will reset the expiration for all caps from this session.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809)  * caller holds s_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) static int send_renew_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 			   struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 	struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 	int state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 	if (time_after_eq(jiffies, session->s_cap_ttl) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 	    time_after_eq(session->s_cap_ttl, session->s_renew_requested))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 		pr_info("mds%d caps stale\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 	session->s_renew_requested = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 	/* do not try to renew caps until a recovering mds has reconnected
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 	 * with its clients. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 	state = ceph_mdsmap_get_state(mdsc->mdsmap, session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 	if (state < CEPH_MDS_STATE_RECONNECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 		dout("send_renew_caps ignoring mds%d (%s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 		     session->s_mds, ceph_mds_state_name(state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 	dout("send_renew_caps to mds%d (%s)\n", session->s_mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 		ceph_mds_state_name(state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 	msg = create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 				 ++session->s_renew_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 	if (!msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 	ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) static int send_flushmsg_ack(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 			     struct ceph_mds_session *session, u64 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 	struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 	dout("send_flushmsg_ack to mds%d (%s)s seq %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 	     session->s_mds, ceph_session_state_name(session->s_state), seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 	msg = create_session_msg(CEPH_SESSION_FLUSHMSG_ACK, seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 	if (!msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 	ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857)  * Note new cap ttl, and any transition from stale -> not stale (fresh?).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859)  * Called under session->s_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) static void renewed_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 			 struct ceph_mds_session *session, int is_renew)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	int was_stale;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 	int wake = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 	spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 	was_stale = is_renew && time_after_eq(jiffies, session->s_cap_ttl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 	session->s_cap_ttl = session->s_renew_requested +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 		mdsc->mdsmap->m_session_timeout*HZ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 	if (was_stale) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 		if (time_before(jiffies, session->s_cap_ttl)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 			pr_info("mds%d caps renewed\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 			wake = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 			pr_info("mds%d caps still stale\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 	dout("renewed_caps mds%d ttl now %lu, was %s, now %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 	     session->s_mds, session->s_cap_ttl, was_stale ? "stale" : "fresh",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 	     time_before(jiffies, session->s_cap_ttl) ? "stale" : "fresh");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 	spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 	if (wake)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 		wake_up_session_caps(session, RENEWCAPS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891)  * send a session close request
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) static int request_close_session(struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 	struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 	dout("request_close_session mds%d state %s seq %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 	     session->s_mds, ceph_session_state_name(session->s_state),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 	     session->s_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 	msg = create_session_msg(CEPH_SESSION_REQUEST_CLOSE, session->s_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 	if (!msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 	ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908)  * Called with s_mutex held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) static int __close_session(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 			 struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 	if (session->s_state >= CEPH_MDS_SESSION_CLOSING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 	session->s_state = CEPH_MDS_SESSION_CLOSING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 	return request_close_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) static bool drop_negative_children(struct dentry *dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 	struct dentry *child;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 	bool all_negative = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 	if (!d_is_dir(dentry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 	spin_lock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 	list_for_each_entry(child, &dentry->d_subdirs, d_child) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 		if (d_really_is_positive(child)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 			all_negative = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 	spin_unlock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 	if (all_negative)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 		shrink_dcache_parent(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 	return all_negative;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943)  * Trim old(er) caps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945)  * Because we can't cache an inode without one or more caps, we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946)  * this indirectly: if a cap is unused, we prune its aliases, at which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947)  * point the inode will hopefully get dropped to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949)  * Yes, this is a bit sloppy.  Our only real goal here is to respond to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950)  * memory pressure from the MDS, though, so it needn't be perfect.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 	int *remaining = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 	struct ceph_inode_info *ci = ceph_inode(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 	int used, wanted, oissued, mine;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 	if (*remaining <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 	spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 	mine = cap->issued | cap->implemented;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 	used = __ceph_caps_used(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 	wanted = __ceph_caps_file_wanted(ci);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 	oissued = __ceph_caps_issued_other(ci, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 	dout("trim_caps_cb %p cap %p mine %s oissued %s used %s wanted %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 	     inode, cap, ceph_cap_string(mine), ceph_cap_string(oissued),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) 	     ceph_cap_string(used), ceph_cap_string(wanted));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 	if (cap == ci->i_auth_cap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 		if (ci->i_dirty_caps || ci->i_flushing_caps ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 		    !list_empty(&ci->i_cap_snaps))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 		if ((used | wanted) & CEPH_CAP_ANY_WR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 		/* Note: it's possible that i_filelock_ref becomes non-zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 		 * after dropping auth caps. It doesn't hurt because reply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 		 * of lock mds request will re-add auth caps. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 		if (atomic_read(&ci->i_filelock_ref) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 	/* The inode has cached pages, but it's no longer used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 	 * we can safely drop it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 	if (S_ISREG(inode->i_mode) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 	    wanted == 0 && used == CEPH_CAP_FILE_CACHE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 	    !(oissued & CEPH_CAP_FILE_CACHE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 	  used = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	  oissued = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	if ((used | wanted) & ~oissued & mine)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 		goto out;   /* we need these caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 	if (oissued) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 		/* we aren't the only cap.. just remove us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 		__ceph_remove_cap(cap, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 		(*remaining)--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 		struct dentry *dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 		/* try dropping referring dentries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 		spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 		dentry = d_find_any_alias(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 		if (dentry && drop_negative_children(dentry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 			int count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 			dput(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 			d_prune_aliases(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 			count = atomic_read(&inode->i_count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 			if (count == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 				(*remaining)--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 			dout("trim_caps_cb %p cap %p pruned, count now %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 			     inode, cap, count);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 			dput(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 	spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023)  * Trim session cap count down to some max number.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) int ceph_trim_caps(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 		   struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 		   int max_caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 	int trim_caps = session->s_nr_caps - max_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 	dout("trim_caps mds%d start: %d / %d, trim %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 	     session->s_mds, session->s_nr_caps, max_caps, trim_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 	if (trim_caps > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 		int remaining = trim_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 		ceph_iterate_session_caps(session, trim_caps_cb, &remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 		dout("trim_caps mds%d done: %d / %d, trimmed %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 		     session->s_mds, session->s_nr_caps, max_caps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 			trim_caps - remaining);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 	ceph_flush_cap_releases(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) static int check_caps_flush(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 			    u64 want_flush_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 	int ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 	spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 	if (!list_empty(&mdsc->cap_flush_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 		struct ceph_cap_flush *cf =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 			list_first_entry(&mdsc->cap_flush_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 					 struct ceph_cap_flush, g_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 		if (cf->tid <= want_flush_tid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 			dout("check_caps_flush still flushing tid "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 			     "%llu <= %llu\n", cf->tid, want_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 			ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 	spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067)  * flush all dirty inode data to disk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069)  * returns true if we've flushed through want_flush_tid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) static void wait_caps_flush(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 			    u64 want_flush_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	dout("check_caps_flush want %llu\n", want_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	wait_event(mdsc->cap_flushing_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 		   check_caps_flush(mdsc, want_flush_tid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 	dout("check_caps_flush ok, flushed thru %llu\n", want_flush_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083)  * called under s_mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) static void ceph_send_cap_releases(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 				   struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 	struct ceph_msg *msg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 	struct ceph_mds_cap_release *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 	struct ceph_mds_cap_item *item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 	struct ceph_osd_client *osdc = &mdsc->fsc->client->osdc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 	struct ceph_cap *cap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 	LIST_HEAD(tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 	int num_cap_releases;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 	__le32	barrier, *cap_barrier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 	down_read(&osdc->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 	barrier = cpu_to_le32(osdc->epoch_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 	up_read(&osdc->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 	spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 	list_splice_init(&session->s_cap_releases, &tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 	num_cap_releases = session->s_num_cap_releases;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 	session->s_num_cap_releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 	spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 	while (!list_empty(&tmp_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 		if (!msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) 			msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 					PAGE_SIZE, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 			if (!msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 				goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 			head = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 			head->num = cpu_to_le32(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 			msg->front.iov_len = sizeof(*head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 			msg->hdr.version = cpu_to_le16(2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 			msg->hdr.compat_version = cpu_to_le16(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 		cap = list_first_entry(&tmp_list, struct ceph_cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 					session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 		list_del(&cap->session_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 		num_cap_releases--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 		head = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 		put_unaligned_le32(get_unaligned_le32(&head->num) + 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 				   &head->num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 		item = msg->front.iov_base + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 		item->ino = cpu_to_le64(cap->cap_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 		item->cap_id = cpu_to_le64(cap->cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 		item->migrate_seq = cpu_to_le32(cap->mseq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 		item->seq = cpu_to_le32(cap->issue_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 		msg->front.iov_len += sizeof(*item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) 		ceph_put_cap(mdsc, cap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 		if (le32_to_cpu(head->num) == CEPH_CAPS_PER_RELEASE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 			// Append cap_barrier field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 			cap_barrier = msg->front.iov_base + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 			*cap_barrier = barrier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 			msg->front.iov_len += sizeof(*cap_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 			msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 			dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 			ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 			msg = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 	BUG_ON(num_cap_releases != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 	spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 	if (!list_empty(&session->s_cap_releases))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 		goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 	spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 	if (msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) 		// Append cap_barrier field
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 		cap_barrier = msg->front.iov_base + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 		*cap_barrier = barrier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) 		msg->front.iov_len += sizeof(*cap_barrier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) 		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 		dout("send_cap_releases mds%d %p\n", session->s_mds, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 		ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) 	pr_err("send_cap_releases mds%d, failed to allocate message\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 		session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 	spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 	list_splice(&tmp_list, &session->s_cap_releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 	session->s_num_cap_releases += num_cap_releases;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 	spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) static void ceph_cap_release_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 	struct ceph_mds_session *session =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 		container_of(work, struct ceph_mds_session, s_cap_release_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) 	mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 	if (session->s_state == CEPH_MDS_SESSION_OPEN ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 	    session->s_state == CEPH_MDS_SESSION_HUNG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) 		ceph_send_cap_releases(session->s_mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) 	mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) 	ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) void ceph_flush_cap_releases(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 		             struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 	if (mdsc->stopping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 	ceph_get_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 	if (queue_work(mdsc->fsc->cap_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 		       &session->s_cap_release_work)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 		dout("cap release work queued\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) 		ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 		dout("failed to queue cap release work\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209)  * caller holds session->s_cap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) void __ceph_queue_cap_release(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 			      struct ceph_cap *cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 	list_add_tail(&cap->session_caps, &session->s_cap_releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) 	session->s_num_cap_releases++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 	if (!(session->s_num_cap_releases % CEPH_CAPS_PER_RELEASE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) 		ceph_flush_cap_releases(session->s_mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) static void ceph_cap_reclaim_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) 	struct ceph_mds_client *mdsc =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) 		container_of(work, struct ceph_mds_client, cap_reclaim_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) 	int ret = ceph_trim_dentries(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 	if (ret == -EAGAIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 		ceph_queue_cap_reclaim_work(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) void ceph_queue_cap_reclaim_work(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 	if (mdsc->stopping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235)         if (queue_work(mdsc->fsc->cap_wq, &mdsc->cap_reclaim_work)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236)                 dout("caps reclaim work queued\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237)         } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238)                 dout("failed to queue caps release work\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239)         }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) void ceph_reclaim_caps_nr(struct ceph_mds_client *mdsc, int nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 	int val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 	if (!nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 	val = atomic_add_return(nr, &mdsc->cap_reclaim_pending);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 	if ((val % CEPH_CAPS_PER_RELEASE) < nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 		atomic_set(&mdsc->cap_reclaim_pending, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 		ceph_queue_cap_reclaim_work(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255)  * requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) int ceph_alloc_readdir_reply_buffer(struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 				    struct inode *dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 	struct ceph_inode_info *ci = ceph_inode(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 	struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 	struct ceph_mount_options *opt = req->r_mdsc->fsc->mount_options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 	size_t size = sizeof(struct ceph_mds_reply_dir_entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 	unsigned int num_entries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 	int order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) 	spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 	num_entries = ci->i_files + ci->i_subdirs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 	spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 	num_entries = max(num_entries, 1U);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 	num_entries = min(num_entries, opt->max_readdir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 	order = get_order(size * num_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 	while (order >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 		rinfo->dir_entries = (void*)__get_free_pages(GFP_KERNEL |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 							     __GFP_NOWARN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 							     order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 		if (rinfo->dir_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 		order--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 	if (!rinfo->dir_entries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 	num_entries = (PAGE_SIZE << order) / size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 	num_entries = min(num_entries, opt->max_readdir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 	rinfo->dir_buf_size = PAGE_SIZE << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 	req->r_num_caps = num_entries + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 	req->r_args.readdir.max_entries = cpu_to_le32(num_entries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 	req->r_args.readdir.max_bytes = cpu_to_le32(opt->max_readdir_bytes);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297)  * Create an mds request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) struct ceph_mds_request *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 	struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 	req = kmem_cache_zalloc(ceph_mds_request_cachep, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	if (!req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 	mutex_init(&req->r_fill_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 	req->r_mdsc = mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 	req->r_started = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 	req->r_start_latency = ktime_get();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 	req->r_resend_mds = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 	INIT_LIST_HEAD(&req->r_unsafe_dir_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 	INIT_LIST_HEAD(&req->r_unsafe_target_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 	req->r_fmode = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 	kref_init(&req->r_kref);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 	RB_CLEAR_NODE(&req->r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 	INIT_LIST_HEAD(&req->r_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 	init_completion(&req->r_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 	init_completion(&req->r_safe_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 	INIT_LIST_HEAD(&req->r_unsafe_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 	ktime_get_coarse_real_ts64(&req->r_stamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 	req->r_op = op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 	req->r_direct_mode = mode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 	return req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)  * return oldest (lowest) request, tid in request tree, 0 if none.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333)  * called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) static struct ceph_mds_request *__get_oldest_req(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 	if (RB_EMPTY_ROOT(&mdsc->request_tree))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 	return rb_entry(rb_first(&mdsc->request_tree),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 			struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) static inline  u64 __get_oldest_tid(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 	return mdsc->oldest_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349)  * Build a dentry's path.  Allocate on heap; caller must kfree.  Based
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350)  * on build_path_from_dentry in fs/cifs/dir.c.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352)  * If @stop_on_nosnap, generate path relative to the first non-snapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353)  * inode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355)  * Encode hidden .snap dirs as a double /, i.e.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356)  *   foo/.snap/bar -> foo//bar
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 			   int stop_on_nosnap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 	struct dentry *temp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) 	char *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 	int pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 	unsigned seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 	u64 base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 	if (!dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 	path = __getname();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 	if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 	pos = PATH_MAX - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 	path[pos] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 	seq = read_seqbegin(&rename_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 	temp = dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 	for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 		struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 		spin_lock(&temp->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 		inode = d_inode(temp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 		if (inode && ceph_snap(inode) == CEPH_SNAPDIR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 			dout("build_path path+%d: %p SNAPDIR\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 			     pos, temp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 		} else if (stop_on_nosnap && inode && dentry != temp &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 			   ceph_snap(inode) == CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 			spin_unlock(&temp->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 			pos++; /* get rid of any prepended '/' */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 			pos -= temp->d_name.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 			if (pos < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) 				spin_unlock(&temp->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 			memcpy(path + pos, temp->d_name.name, temp->d_name.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 		spin_unlock(&temp->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 		temp = READ_ONCE(temp->d_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 		/* Are we at the root? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 		if (IS_ROOT(temp))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 		/* Are we out of buffer? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 		if (--pos < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 		path[pos] = '/';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) 	base = ceph_ino(d_inode(temp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 	if (read_seqretry(&rename_lock, seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 	if (pos < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 		 * A rename didn't occur, but somehow we didn't end up where
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 		 * we thought we would. Throw a warning and try again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 		pr_warn("build_path did not end path lookup where "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 			"expected, pos is %d\n", pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 	*pbase = base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 	*plen = PATH_MAX - 1 - pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 	dout("build_path on %p %d built %llx '%.*s'\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 	     dentry, d_count(dentry), base, *plen, path + pos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 	return path + pos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) static int build_dentry_path(struct dentry *dentry, struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 			     const char **ppath, int *ppathlen, u64 *pino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 			     bool *pfreepath, bool parent_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 	char *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 	rcu_read_lock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 	if (!dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 		dir = d_inode_rcu(dentry->d_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) 	if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 		*pino = ceph_ino(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 		rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 		*ppath = dentry->d_name.name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) 		*ppathlen = dentry->d_name.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) 	rcu_read_unlock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 	path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 	if (IS_ERR(path))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 		return PTR_ERR(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 	*ppath = path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 	*pfreepath = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) static int build_inode_path(struct inode *inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) 			    const char **ppath, int *ppathlen, u64 *pino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) 			    bool *pfreepath)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 	struct dentry *dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) 	char *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) 	if (ceph_snap(inode) == CEPH_NOSNAP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) 		*pino = ceph_ino(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) 		*ppathlen = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) 	dentry = d_find_alias(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 	path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 	dput(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) 	if (IS_ERR(path))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) 		return PTR_ERR(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 	*ppath = path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 	*pfreepath = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485)  * request arguments may be specified via an inode *, a dentry *, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486)  * an explicit ino+path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) 				  struct inode *rdiri, const char *rpath,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) 				  u64 rino, const char **ppath, int *pathlen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 				  u64 *ino, bool *freepath, bool parent_locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) 	int r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 	if (rinode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 		r = build_inode_path(rinode, ppath, pathlen, ino, freepath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 		dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) 		     ceph_snap(rinode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) 	} else if (rdentry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) 		r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) 					freepath, parent_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) 		dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) 		     *ppath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) 	} else if (rpath || rino) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 		*ino = rino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) 		*ppath = rpath;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) 		*pathlen = rpath ? strlen(rpath) : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) 		dout(" path %.*s\n", *pathlen, rpath);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) 	return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515)  * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 					       struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 					       int mds, bool drop_cap_releases)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 	struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 	struct ceph_mds_request_head *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 	const char *path1 = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 	const char *path2 = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) 	u64 ino1 = 0, ino2 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 	int pathlen1 = 0, pathlen2 = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 	bool freepath1 = false, freepath2 = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 	int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 	u16 releases;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) 	void *p, *end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 	ret = set_request_path_attr(req->r_inode, req->r_dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 			      req->r_parent, req->r_path1, req->r_ino1.ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 			      &path1, &pathlen1, &ino1, &freepath1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) 			      test_bit(CEPH_MDS_R_PARENT_LOCKED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 					&req->r_req_flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 		msg = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 	/* If r_old_dentry is set, then assume that its parent is locked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 	ret = set_request_path_attr(NULL, req->r_old_dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) 			      req->r_old_dentry_dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) 			      req->r_path2, req->r_ino2.ino,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) 			      &path2, &pathlen2, &ino2, &freepath2, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) 		msg = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) 		goto out_free1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) 	len = sizeof(*head) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) 		pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) 		sizeof(struct ceph_timespec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) 	/* calculate (max) length for cap releases */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) 	len += sizeof(struct ceph_mds_request_release) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) 		(!!req->r_inode_drop + !!req->r_dentry_drop +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) 		 !!req->r_old_inode_drop + !!req->r_old_dentry_drop);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) 	if (req->r_dentry_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) 		len += pathlen1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) 	if (req->r_old_dentry_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) 		len += pathlen2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) 	msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) 	if (!msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) 		msg = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 		goto out_free2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 	msg->hdr.version = cpu_to_le16(2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) 	msg->hdr.tid = cpu_to_le64(req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) 	head = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) 	p = msg->front.iov_base + sizeof(*head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) 	end = msg->front.iov_base + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) 	head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) 	head->op = cpu_to_le32(req->r_op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) 	head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) 	head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) 	head->ino = cpu_to_le64(req->r_deleg_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) 	head->args = req->r_args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) 	ceph_encode_filepath(&p, end, ino1, path1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) 	ceph_encode_filepath(&p, end, ino2, path2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) 	/* make note of release offset, in case we need to replay */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) 	req->r_request_release_offset = p - msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) 	/* cap releases */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) 	releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) 	if (req->r_inode_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) 		releases += ceph_encode_inode_release(&p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) 		      req->r_inode ? req->r_inode : d_inode(req->r_dentry),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) 		      mds, req->r_inode_drop, req->r_inode_unless,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) 		      req->r_op == CEPH_MDS_OP_READDIR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) 	if (req->r_dentry_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) 		releases += ceph_encode_dentry_release(&p, req->r_dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) 				req->r_parent, mds, req->r_dentry_drop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 				req->r_dentry_unless);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) 	if (req->r_old_dentry_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 		releases += ceph_encode_dentry_release(&p, req->r_old_dentry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 				req->r_old_dentry_dir, mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) 				req->r_old_dentry_drop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 				req->r_old_dentry_unless);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) 	if (req->r_old_inode_drop)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) 		releases += ceph_encode_inode_release(&p,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) 		      d_inode(req->r_old_dentry),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) 		      mds, req->r_old_inode_drop, req->r_old_inode_unless, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) 	if (drop_cap_releases) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) 		releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) 		p = msg->front.iov_base + req->r_request_release_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) 	head->num_releases = cpu_to_le16(releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) 	/* time stamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) 		struct ceph_timespec ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) 		ceph_encode_timespec64(&ts, &req->r_stamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) 		ceph_encode_copy(&p, &ts, sizeof(ts));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) 	if (WARN_ON_ONCE(p > end)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) 		ceph_msg_put(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) 		msg = ERR_PTR(-ERANGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) 		goto out_free2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) 	msg->front.iov_len = p - msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) 	msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) 	if (req->r_pagelist) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) 		struct ceph_pagelist *pagelist = req->r_pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) 		ceph_msg_data_add_pagelist(msg, pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) 		msg->hdr.data_len = cpu_to_le32(pagelist->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) 		msg->hdr.data_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 	msg->hdr.data_off = cpu_to_le16(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) out_free2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 	if (freepath2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 		ceph_mdsc_free_path((char *)path2, pathlen2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) out_free1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) 	if (freepath1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) 		ceph_mdsc_free_path((char *)path1, pathlen1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 	return msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657)  * called under mdsc->mutex if error, under no mutex if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658)  * success.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) static void complete_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) 			     struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) 	req->r_end_latency = ktime_get();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) 	if (req->r_callback)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) 		req->r_callback(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) 	complete_all(&req->r_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671)  * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) static int __prepare_send_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) 				  struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 				  int mds, bool drop_cap_releases)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 	struct ceph_mds_request_head *rhead;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) 	struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) 	int flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 	req->r_attempts++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 	if (req->r_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) 		struct ceph_cap *cap =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) 			ceph_get_cap_for_mds(ceph_inode(req->r_inode), mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) 		if (cap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) 			req->r_sent_on_mseq = cap->mseq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) 			req->r_sent_on_mseq = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) 	dout("prepare_send_request %p tid %lld %s (attempt %d)\n", req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) 	     req->r_tid, ceph_mds_op_name(req->r_op), req->r_attempts);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) 	if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) 		void *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) 		 * Replay.  Do not regenerate message (and rebuild
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) 		 * paths, etc.); just use the original message.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) 		 * Rebuilding paths will break for renames because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) 		 * d_move mangles the src name.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) 		msg = req->r_request;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) 		rhead = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) 		flags = le32_to_cpu(rhead->flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) 		flags |= CEPH_MDS_FLAG_REPLAY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) 		rhead->flags = cpu_to_le32(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) 		if (req->r_target_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) 			rhead->ino = cpu_to_le64(ceph_ino(req->r_target_inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 		rhead->num_retry = req->r_attempts - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) 		/* remove cap/dentry releases from message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 		rhead->num_releases = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) 		/* time stamp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) 		p = msg->front.iov_base + req->r_request_release_offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) 		{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) 			struct ceph_timespec ts;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) 			ceph_encode_timespec64(&ts, &req->r_stamp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) 			ceph_encode_copy(&p, &ts, sizeof(ts));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) 		msg->front.iov_len = p - msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) 		msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) 	if (req->r_request) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) 		ceph_msg_put(req->r_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) 		req->r_request = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) 	msg = create_request_message(mdsc, req, mds, drop_cap_releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) 	if (IS_ERR(msg)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) 		req->r_err = PTR_ERR(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) 		return PTR_ERR(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) 	req->r_request = msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) 	rhead = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) 	rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) 	if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) 		flags |= CEPH_MDS_FLAG_REPLAY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) 	if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) 		flags |= CEPH_MDS_FLAG_ASYNC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) 	if (req->r_parent)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) 		flags |= CEPH_MDS_FLAG_WANT_DENTRY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) 	rhead->flags = cpu_to_le32(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) 	rhead->num_fwd = req->r_num_fwd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) 	rhead->num_retry = req->r_attempts - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) 	dout(" r_parent = %p\n", req->r_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758)  * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) static int __send_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) 			  struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) 			  struct ceph_mds_request *req,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) 			  bool drop_cap_releases)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) 	err = __prepare_send_request(mdsc, req, session->s_mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) 				     drop_cap_releases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) 	if (!err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) 		ceph_msg_get(req->r_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) 		ceph_con_send(&session->s_con, req->r_request);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778)  * send request, or put it on the appropriate wait list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) static void __do_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) 			struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) 	struct ceph_mds_session *session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) 	int mds = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) 	bool random;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) 	if (req->r_err || test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) 		if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) 			__unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) 	if (req->r_timeout &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 	    time_after_eq(jiffies, req->r_started + req->r_timeout)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 		dout("do_request timed out\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) 		err = -ETIMEDOUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 		goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 		dout("do_request forced umount\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 		err = -EIO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) 		goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) 	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_MOUNTING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) 		if (mdsc->mdsmap_err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) 			err = mdsc->mdsmap_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) 			dout("do_request mdsmap err %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) 			goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) 		if (mdsc->mdsmap->m_epoch == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) 			dout("do_request no mdsmap, waiting for map\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) 			list_add(&req->r_wait, &mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) 		if (!(mdsc->fsc->mount_options->flags &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) 		      CEPH_MOUNT_OPT_MOUNTWAIT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) 		    !ceph_mdsmap_is_cluster_available(mdsc->mdsmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) 			err = -EHOSTUNREACH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) 			goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) 	put_request_session(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) 	mds = __choose_mds(mdsc, req, &random);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) 	if (mds < 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) 	    ceph_mdsmap_get_state(mdsc->mdsmap, mds) < CEPH_MDS_STATE_ACTIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) 		if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) 			err = -EJUKEBOX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) 			goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) 		dout("do_request no mds or not active, waiting for map\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) 		list_add(&req->r_wait, &mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) 	/* get, open session */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) 	session = __ceph_lookup_mds_session(mdsc, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) 	if (!session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) 		session = register_session(mdsc, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) 		if (IS_ERR(session)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 			err = PTR_ERR(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 			goto finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 	req->r_session = ceph_get_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 	dout("do_request mds%d session %p state %s\n", mds, session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) 	     ceph_session_state_name(session->s_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) 	if (session->s_state != CEPH_MDS_SESSION_OPEN &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) 	    session->s_state != CEPH_MDS_SESSION_HUNG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) 		if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) 			err = -EACCES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) 			goto out_session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) 		 * We cannot queue async requests since the caps and delegated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) 		 * inodes are bound to the session. Just return -EJUKEBOX and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) 		 * let the caller retry a sync request in that case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) 		if (test_bit(CEPH_MDS_R_ASYNC, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) 			err = -EJUKEBOX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) 			goto out_session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) 		if (session->s_state == CEPH_MDS_SESSION_NEW ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) 		    session->s_state == CEPH_MDS_SESSION_CLOSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) 			err = __open_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) 			if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) 				goto out_session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) 			/* retry the same mds later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) 			if (random)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) 				req->r_resend_mds = mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) 		list_add(&req->r_wait, &session->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) 		goto out_session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) 	/* send request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) 	req->r_resend_mds = -1;   /* forget any previous mds hint */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) 	if (req->r_request_started == 0)   /* note request start time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) 		req->r_request_started = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) 	err = __send_request(mdsc, session, req, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) out_session:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) 	ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) 	if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) 		dout("__do_request early error %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) 		req->r_err = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) 		complete_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) 		__unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900)  * called under mdsc->mutex
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) static void __wake_requests(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 			    struct list_head *head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) 	struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) 	LIST_HEAD(tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) 	list_splice_init(head, &tmp_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) 	while (!list_empty(&tmp_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) 		req = list_entry(tmp_list.next,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) 				 struct ceph_mds_request, r_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) 		list_del_init(&req->r_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) 		dout(" wake request %p tid %llu\n", req, req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) 		__do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920)  * Wake up threads with requests pending for @mds, so that they can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921)  * resubmit their requests to a possibly different mds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) static void kick_requests(struct ceph_mds_client *mdsc, int mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) 	struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) 	struct rb_node *p = rb_first(&mdsc->request_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) 	dout("kick_requests mds%d\n", mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) 	while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) 		req = rb_entry(p, struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) 		p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) 		if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) 		if (req->r_attempts > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) 			continue; /* only new requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) 		if (req->r_session &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) 		    req->r_session->s_mds == mds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) 			dout(" kicking tid %llu\n", req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) 			list_del_init(&req->r_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) 			__do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) int ceph_mdsc_submit_request(struct ceph_mds_client *mdsc, struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) 			      struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) 	/* take CAP_PIN refs for r_inode, r_parent, r_old_dentry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) 	if (req->r_inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) 		ceph_get_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) 	if (req->r_parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) 		struct ceph_inode_info *ci = ceph_inode(req->r_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) 		int fmode = (req->r_op & CEPH_MDS_OP_WRITE) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) 			    CEPH_FILE_MODE_WR : CEPH_FILE_MODE_RD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) 		spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) 		ceph_take_cap_refs(ci, CEPH_CAP_PIN, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) 		__ceph_touch_fmode(ci, mdsc, fmode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) 		spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) 		ihold(req->r_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) 	if (req->r_old_dentry_dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) 		ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) 				  CEPH_CAP_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) 	if (req->r_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) 		err = ceph_wait_on_async_create(req->r_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) 		if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) 			dout("%s: wait for async create returned: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) 			     __func__, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) 			return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) 	if (!err && req->r_old_inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) 		err = ceph_wait_on_async_create(req->r_old_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) 		if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) 			dout("%s: wait for async create returned: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) 			     __func__, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) 			return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) 	dout("submit_request on %p for inode %p\n", req, dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) 	__register_request(mdsc, req, dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) 	__do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) 	err = req->r_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) static int ceph_mdsc_wait_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) 				  struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) 	/* wait */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) 	dout("do_request waiting\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) 	if (!req->r_timeout && req->r_wait_for_completion) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) 		err = req->r_wait_for_completion(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) 		long timeleft = wait_for_completion_killable_timeout(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) 					&req->r_completion,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) 					ceph_timeout_jiffies(req->r_timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) 		if (timeleft > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) 			err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) 		else if (!timeleft)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) 			err = -ETIMEDOUT;  /* timed out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) 			err = timeleft;  /* killed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) 	dout("do_request waited, got %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) 	/* only abort if we didn't race with a real reply */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) 	if (test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) 		err = le32_to_cpu(req->r_reply_info.head->result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) 	} else if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) 		dout("aborted request %lld with %d\n", req->r_tid, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) 		 * ensure we aren't running concurrently with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) 		 * ceph_fill_trace or ceph_readdir_prepopulate, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) 		 * rely on locks (dir mutex) held by our caller.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) 		mutex_lock(&req->r_fill_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) 		req->r_err = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) 		set_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) 		mutex_unlock(&req->r_fill_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) 		if (req->r_parent &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) 		    (req->r_op & CEPH_MDS_OP_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) 			ceph_invalidate_dir_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) 		err = req->r_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045)  * Synchrously perform an mds request.  Take care of all of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046)  * session setup, forwarding, retry details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) 			 struct inode *dir,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) 			 struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) 	dout("do_request on %p\n", req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) 	/* issue */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) 	err = ceph_mdsc_submit_request(mdsc, dir, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) 	if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) 		err = ceph_mdsc_wait_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) 	dout("do_request %p done, result %d\n", req, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065)  * Invalidate dir's completeness, dentry lease state on an aborted MDS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066)  * namespace request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) void ceph_invalidate_dir_request(struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) 	struct inode *dir = req->r_parent;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) 	struct inode *old_dir = req->r_old_dentry_dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3073) 	dout("invalidate_dir_request %p %p (complete, lease(s))\n", dir, old_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3075) 	ceph_dir_clear_complete(dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3076) 	if (old_dir)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3077) 		ceph_dir_clear_complete(old_dir);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3078) 	if (req->r_dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3079) 		ceph_invalidate_dentry_lease(req->r_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3080) 	if (req->r_old_dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3081) 		ceph_invalidate_dentry_lease(req->r_old_dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3083) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3084) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3085)  * Handle mds reply.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3086)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3087)  * We take the session mutex and parse and process the reply immediately.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3088)  * This preserves the logical ordering of replies, capabilities, etc., sent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3089)  * by the MDS as they are applied to our local cache.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3090)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3091) static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3092) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3093) 	struct ceph_mds_client *mdsc = session->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3094) 	struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3095) 	struct ceph_mds_reply_head *head = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3096) 	struct ceph_mds_reply_info_parsed *rinfo;  /* parsed reply info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3097) 	struct ceph_snap_realm *realm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3098) 	u64 tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3099) 	int err, result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3100) 	int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3102) 	if (msg->front.iov_len < sizeof(*head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3103) 		pr_err("mdsc_handle_reply got corrupt (short) reply\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3104) 		ceph_msg_dump(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3105) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3106) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3108) 	/* get request, session */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3109) 	tid = le64_to_cpu(msg->hdr.tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3110) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3111) 	req = lookup_get_request(mdsc, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3112) 	if (!req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3113) 		dout("handle_reply on unknown tid %llu\n", tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3114) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3115) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3116) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3117) 	dout("handle_reply %p\n", req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3119) 	/* correct session? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3120) 	if (req->r_session != session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3121) 		pr_err("mdsc_handle_reply got %llu on session mds%d"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3122) 		       " not mds%d\n", tid, session->s_mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3123) 		       req->r_session ? req->r_session->s_mds : -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3124) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3125) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3126) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3128) 	/* dup? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3129) 	if ((test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags) && !head->safe) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3130) 	    (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags) && head->safe)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3131) 		pr_warn("got a dup %s reply on %llu from mds%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3132) 			   head->safe ? "safe" : "unsafe", tid, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3133) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3134) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3135) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3136) 	if (test_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3137) 		pr_warn("got unsafe after safe on %llu from mds%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3138) 			   tid, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3139) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3140) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3141) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3143) 	result = le32_to_cpu(head->result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3145) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3146) 	 * Handle an ESTALE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3147) 	 * if we're not talking to the authority, send to them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3148) 	 * if the authority has changed while we weren't looking,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3149) 	 * send to new authority
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3150) 	 * Otherwise we just have to return an ESTALE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3151) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3152) 	if (result == -ESTALE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3153) 		dout("got ESTALE on request %llu\n", req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3154) 		req->r_resend_mds = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3155) 		if (req->r_direct_mode != USE_AUTH_MDS) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3156) 			dout("not using auth, setting for that now\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3157) 			req->r_direct_mode = USE_AUTH_MDS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3158) 			__do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3159) 			mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3160) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3161) 		} else  {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3162) 			int mds = __choose_mds(mdsc, req, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3163) 			if (mds >= 0 && mds != req->r_session->s_mds) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3164) 				dout("but auth changed, so resending\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3165) 				__do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3166) 				mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3167) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3168) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3169) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3170) 		dout("have to return ESTALE on request %llu\n", req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3171) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3174) 	if (head->safe) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3175) 		set_bit(CEPH_MDS_R_GOT_SAFE, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3176) 		__unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3178) 		/* last request during umount? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3179) 		if (mdsc->stopping && !__get_oldest_req(mdsc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3180) 			complete_all(&mdsc->safe_umount_waiters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3182) 		if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3183) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3184) 			 * We already handled the unsafe response, now do the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3185) 			 * cleanup.  No need to examine the response; the MDS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3186) 			 * doesn't include any result info in the safe
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3187) 			 * response.  And even if it did, there is nothing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3188) 			 * useful we could do with a revised return value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3189) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3190) 			dout("got safe reply %llu, mds%d\n", tid, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3192) 			mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3193) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3194) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3195) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3196) 		set_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3197) 		list_add_tail(&req->r_unsafe_item, &req->r_session->s_unsafe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3198) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3200) 	dout("handle_reply tid %lld result %d\n", tid, result);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3201) 	rinfo = &req->r_reply_info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3202) 	if (test_bit(CEPHFS_FEATURE_REPLY_ENCODING, &session->s_features))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3203) 		err = parse_reply_info(session, msg, rinfo, (u64)-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3204) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3205) 		err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3206) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3208) 	mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3209) 	if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3210) 		pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3211) 		ceph_msg_dump(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3212) 		goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3213) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3215) 	/* snap trace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3216) 	realm = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3217) 	if (rinfo->snapblob_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3218) 		down_write(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3219) 		ceph_update_snap_trace(mdsc, rinfo->snapblob,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3220) 				rinfo->snapblob + rinfo->snapblob_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3221) 				le32_to_cpu(head->op) == CEPH_MDS_OP_RMSNAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3222) 				&realm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3223) 		downgrade_write(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3224) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3225) 		down_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3226) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3228) 	/* insert trace into our cache */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3229) 	mutex_lock(&req->r_fill_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3230) 	current->journal_info = req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3231) 	err = ceph_fill_trace(mdsc->fsc->sb, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3232) 	if (err == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3233) 		if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3234) 				    req->r_op == CEPH_MDS_OP_LSSNAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3235) 			ceph_readdir_prepopulate(req, req->r_session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3236) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3237) 	current->journal_info = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3238) 	mutex_unlock(&req->r_fill_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3240) 	up_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3241) 	if (realm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3242) 		ceph_put_snap_realm(mdsc, realm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3244) 	if (err == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3245) 		if (req->r_target_inode &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3246) 		    test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3247) 			struct ceph_inode_info *ci =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3248) 				ceph_inode(req->r_target_inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3249) 			spin_lock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3250) 			list_add_tail(&req->r_unsafe_target_item,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3251) 				      &ci->i_unsafe_iops);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3252) 			spin_unlock(&ci->i_unsafe_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3253) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3255) 		ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3256) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3257) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3258) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3259) 	if (!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3260) 		if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3261) 			req->r_err = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3262) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3263) 			req->r_reply =  ceph_msg_get(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3264) 			set_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3265) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3266) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3267) 		dout("reply arrived after request %lld was aborted\n", tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3268) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3269) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3271) 	mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3273) 	/* kick calling process */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3274) 	complete_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3276) 	ceph_update_metadata_latency(&mdsc->metric, req->r_start_latency,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3277) 				     req->r_end_latency, err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3278) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3279) 	ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3280) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3285) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3286)  * handle mds notification that our request has been forwarded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3287)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3288) static void handle_forward(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3289) 			   struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3290) 			   struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3291) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3292) 	struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3293) 	u64 tid = le64_to_cpu(msg->hdr.tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3294) 	u32 next_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3295) 	u32 fwd_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3296) 	int err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3297) 	void *p = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3298) 	void *end = p + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3300) 	ceph_decode_need(&p, end, 2*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3301) 	next_mds = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3302) 	fwd_seq = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3304) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3305) 	req = lookup_get_request(mdsc, tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3306) 	if (!req) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3307) 		dout("forward tid %llu to mds%d - req dne\n", tid, next_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3308) 		goto out;  /* dup reply? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3309) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3311) 	if (test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3312) 		dout("forward tid %llu aborted, unregistering\n", tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3313) 		__unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3314) 	} else if (fwd_seq <= req->r_num_fwd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3315) 		dout("forward tid %llu to mds%d - old seq %d <= %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3316) 		     tid, next_mds, req->r_num_fwd, fwd_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3317) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3318) 		/* resend. forward race not possible; mds would drop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3319) 		dout("forward tid %llu to mds%d (we resend)\n", tid, next_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3320) 		BUG_ON(req->r_err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3321) 		BUG_ON(test_bit(CEPH_MDS_R_GOT_RESULT, &req->r_req_flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3322) 		req->r_attempts = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3323) 		req->r_num_fwd = fwd_seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3324) 		req->r_resend_mds = next_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3325) 		put_request_session(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3326) 		__do_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3327) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3328) 	ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3329) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3330) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3331) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3333) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3334) 	pr_err("mdsc_handle_forward decode error err=%d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3335) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3337) static int __decode_session_metadata(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3338) 				     bool *blocklisted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3340) 	/* map<string,string> */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3341) 	u32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3342) 	bool err_str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3343) 	ceph_decode_32_safe(p, end, n, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3344) 	while (n-- > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3345) 		u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3346) 		ceph_decode_32_safe(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3347) 		ceph_decode_need(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3348) 		err_str = !strncmp(*p, "error_string", len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3349) 		*p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3350) 		ceph_decode_32_safe(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3351) 		ceph_decode_need(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3352) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3353) 		 * Match "blocklisted (blacklisted)" from newer MDSes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3354) 		 * or "blacklisted" from older MDSes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3355) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3356) 		if (err_str && strnstr(*p, "blacklisted", len))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3357) 			*blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3358) 		*p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3359) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3360) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3361) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3362) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3363) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3365) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3366)  * handle a mds session control message
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3367)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3368) static void handle_session(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3369) 			   struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3370) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3371) 	struct ceph_mds_client *mdsc = session->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3372) 	int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3373) 	int msg_version = le16_to_cpu(msg->hdr.version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3374) 	void *p = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3375) 	void *end = p + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3376) 	struct ceph_mds_session_head *h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3377) 	u32 op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3378) 	u64 seq, features = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3379) 	int wake = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3380) 	bool blocklisted = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3382) 	/* decode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3383) 	ceph_decode_need(&p, end, sizeof(*h), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3384) 	h = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3385) 	p += sizeof(*h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3387) 	op = le32_to_cpu(h->op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3388) 	seq = le64_to_cpu(h->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3390) 	if (msg_version >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3391) 		u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3392) 		/* version >= 2, metadata */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3393) 		if (__decode_session_metadata(&p, end, &blocklisted) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3394) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3395) 		/* version >= 3, feature bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3396) 		ceph_decode_32_safe(&p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3397) 		if (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3398) 			ceph_decode_64_safe(&p, end, features, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3399) 			p += len - sizeof(features);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3400) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3401) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3403) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3404) 	if (op == CEPH_SESSION_CLOSE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3405) 		ceph_get_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3406) 		__unregister_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3407) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3408) 	/* FIXME: this ttl calculation is generous */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3409) 	session->s_ttl = jiffies + HZ*mdsc->mdsmap->m_session_autoclose;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3410) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3412) 	mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3414) 	dout("handle_session mds%d %s %p state %s seq %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3415) 	     mds, ceph_session_op_name(op), session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3416) 	     ceph_session_state_name(session->s_state), seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3418) 	if (session->s_state == CEPH_MDS_SESSION_HUNG) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3419) 		session->s_state = CEPH_MDS_SESSION_OPEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3420) 		pr_info("mds%d came back\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3421) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3422) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3423) 	switch (op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3424) 	case CEPH_SESSION_OPEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3425) 		if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3426) 			pr_info("mds%d reconnect success\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3427) 		session->s_state = CEPH_MDS_SESSION_OPEN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3428) 		session->s_features = features;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3429) 		renewed_caps(mdsc, session, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3430) 		if (test_bit(CEPHFS_FEATURE_METRIC_COLLECT, &session->s_features))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3431) 			metric_schedule_delayed(&mdsc->metric);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3432) 		wake = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3433) 		if (mdsc->stopping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3434) 			__close_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3435) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3437) 	case CEPH_SESSION_RENEWCAPS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3438) 		if (session->s_renew_seq == seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3439) 			renewed_caps(mdsc, session, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3440) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3442) 	case CEPH_SESSION_CLOSE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3443) 		if (session->s_state == CEPH_MDS_SESSION_RECONNECTING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3444) 			pr_info("mds%d reconnect denied\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3445) 		session->s_state = CEPH_MDS_SESSION_CLOSED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3446) 		cleanup_session_requests(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3447) 		remove_session_caps(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3448) 		wake = 2; /* for good measure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3449) 		wake_up_all(&mdsc->session_close_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3450) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3451) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3452) 	case CEPH_SESSION_STALE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3453) 		pr_info("mds%d caps went stale, renewing\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3454) 			session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3455) 		spin_lock(&session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3456) 		session->s_cap_gen++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3457) 		session->s_cap_ttl = jiffies - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3458) 		spin_unlock(&session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3459) 		send_renew_caps(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3460) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3462) 	case CEPH_SESSION_RECALL_STATE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3463) 		ceph_trim_caps(mdsc, session, le32_to_cpu(h->max_caps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3464) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3466) 	case CEPH_SESSION_FLUSHMSG:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3467) 		send_flushmsg_ack(mdsc, session, seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3468) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3470) 	case CEPH_SESSION_FORCE_RO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3471) 		dout("force_session_readonly %p\n", session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3472) 		spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3473) 		session->s_readonly = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3474) 		spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3475) 		wake_up_session_caps(session, FORCE_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3476) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3478) 	case CEPH_SESSION_REJECT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3479) 		WARN_ON(session->s_state != CEPH_MDS_SESSION_OPENING);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3480) 		pr_info("mds%d rejected session\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3481) 		session->s_state = CEPH_MDS_SESSION_REJECTED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3482) 		cleanup_session_requests(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3483) 		remove_session_caps(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3484) 		if (blocklisted)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3485) 			mdsc->fsc->blocklisted = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3486) 		wake = 2; /* for good measure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3487) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3489) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3490) 		pr_err("mdsc_handle_session bad op %d mds%d\n", op, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3491) 		WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3492) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3494) 	mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3495) 	if (wake) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3496) 		mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3497) 		__wake_requests(mdsc, &session->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3498) 		if (wake == 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3499) 			kick_requests(mdsc, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3500) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3501) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3502) 	if (op == CEPH_SESSION_CLOSE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3503) 		ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3504) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3506) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3507) 	pr_err("mdsc_handle_session corrupt message mds%d len %d\n", mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3508) 	       (int)msg->front.iov_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3509) 	ceph_msg_dump(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3510) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3511) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3513) void ceph_mdsc_release_dir_caps(struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3515) 	int dcaps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3517) 	dcaps = xchg(&req->r_dir_caps, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3518) 	if (dcaps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3519) 		dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3520) 		ceph_put_cap_refs(ceph_inode(req->r_parent), dcaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3521) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3522) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3524) void ceph_mdsc_release_dir_caps_no_check(struct ceph_mds_request *req)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3525) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3526) 	int dcaps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3528) 	dcaps = xchg(&req->r_dir_caps, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3529) 	if (dcaps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3530) 		dout("releasing r_dir_caps=%s\n", ceph_cap_string(dcaps));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3531) 		ceph_put_cap_refs_no_check_caps(ceph_inode(req->r_parent),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3532) 						dcaps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3533) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3536) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3537)  * called under session->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3538)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3539) static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3540) 				   struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3541) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3542) 	struct ceph_mds_request *req, *nreq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3543) 	struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3545) 	dout("replay_unsafe_requests mds%d\n", session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3547) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3548) 	list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3549) 		__send_request(mdsc, session, req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3550) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3551) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3552) 	 * also re-send old requests when MDS enters reconnect stage. So that MDS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3553) 	 * can process completed request in clientreplay stage.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3554) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3555) 	p = rb_first(&mdsc->request_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3556) 	while (p) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3557) 		req = rb_entry(p, struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3558) 		p = rb_next(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3559) 		if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3560) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3561) 		if (req->r_attempts == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3562) 			continue; /* only old requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3563) 		if (!req->r_session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3564) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3565) 		if (req->r_session->s_mds != session->s_mds)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3566) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3568) 		ceph_mdsc_release_dir_caps_no_check(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3570) 		__send_request(mdsc, session, req, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3571) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3572) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3575) static int send_reconnect_partial(struct ceph_reconnect_state *recon_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3576) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3577) 	struct ceph_msg *reply;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3578) 	struct ceph_pagelist *_pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3579) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3580) 	__le32 *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3581) 	int err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3583) 	if (!recon_state->allow_multi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3584) 		return -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3585) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3586) 	/* can't handle message that contains both caps and realm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3587) 	BUG_ON(!recon_state->nr_caps == !recon_state->nr_realms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3588) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3589) 	/* pre-allocate new pagelist */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3590) 	_pagelist = ceph_pagelist_alloc(GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3591) 	if (!_pagelist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3592) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3593) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3594) 	reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3595) 	if (!reply)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3596) 		goto fail_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3598) 	/* placeholder for nr_caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3599) 	err = ceph_pagelist_encode_32(_pagelist, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3600) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3601) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3603) 	if (recon_state->nr_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3604) 		/* currently encoding caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3605) 		err = ceph_pagelist_encode_32(recon_state->pagelist, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3606) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3607) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3608) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3609) 		/* placeholder for nr_realms (currently encoding relams) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3610) 		err = ceph_pagelist_encode_32(_pagelist, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3611) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3612) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3613) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3615) 	err = ceph_pagelist_encode_8(recon_state->pagelist, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3616) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3617) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3618) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3619) 	page = list_first_entry(&recon_state->pagelist->head, struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3620) 	addr = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3621) 	if (recon_state->nr_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3622) 		/* currently encoding caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3623) 		*addr = cpu_to_le32(recon_state->nr_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3624) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3625) 		/* currently encoding relams */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3626) 		*(addr + 1) = cpu_to_le32(recon_state->nr_realms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3627) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3628) 	kunmap_atomic(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3630) 	reply->hdr.version = cpu_to_le16(5);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3631) 	reply->hdr.compat_version = cpu_to_le16(4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3633) 	reply->hdr.data_len = cpu_to_le32(recon_state->pagelist->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3634) 	ceph_msg_data_add_pagelist(reply, recon_state->pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3636) 	ceph_con_send(&recon_state->session->s_con, reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3637) 	ceph_pagelist_release(recon_state->pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3638) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3639) 	recon_state->pagelist = _pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3640) 	recon_state->nr_caps = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3641) 	recon_state->nr_realms = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3642) 	recon_state->msg_version = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3643) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3644) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3645) 	ceph_msg_put(reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3646) fail_msg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3647) 	ceph_pagelist_release(_pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3648) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3649) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3651) static struct dentry* d_find_primary(struct inode *inode)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3652) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3653) 	struct dentry *alias, *dn = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3654) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3655) 	if (hlist_empty(&inode->i_dentry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3656) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3658) 	spin_lock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3659) 	if (hlist_empty(&inode->i_dentry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3660) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3662) 	if (S_ISDIR(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3663) 		alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3664) 		if (!IS_ROOT(alias))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3665) 			dn = dget(alias);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3666) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3667) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3669) 	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3670) 		spin_lock(&alias->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3671) 		if (!d_unhashed(alias) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3672) 		    (ceph_dentry(alias)->flags & CEPH_DENTRY_PRIMARY_LINK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3673) 			dn = dget_dlock(alias);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3674) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3675) 		spin_unlock(&alias->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3676) 		if (dn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3677) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3678) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3679) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3680) 	spin_unlock(&inode->i_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3681) 	return dn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3684) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3685)  * Encode information about a cap for a reconnect with the MDS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3686)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3687) static int reconnect_caps_cb(struct inode *inode, struct ceph_cap *cap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3688) 			  void *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3689) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3690) 	union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3691) 		struct ceph_mds_cap_reconnect v2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3692) 		struct ceph_mds_cap_reconnect_v1 v1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3693) 	} rec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3694) 	struct ceph_inode_info *ci = cap->ci;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3695) 	struct ceph_reconnect_state *recon_state = arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3696) 	struct ceph_pagelist *pagelist = recon_state->pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3697) 	struct dentry *dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3698) 	char *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3699) 	int pathlen = 0, err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3700) 	u64 pathbase;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3701) 	u64 snap_follows;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3703) 	dout(" adding %p ino %llx.%llx cap %p %lld %s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3704) 	     inode, ceph_vinop(inode), cap, cap->cap_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3705) 	     ceph_cap_string(cap->issued));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3707) 	dentry = d_find_primary(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3708) 	if (dentry) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3709) 		/* set pathbase to parent dir when msg_version >= 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3710) 		path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3711) 					    recon_state->msg_version >= 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3712) 		dput(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3713) 		if (IS_ERR(path)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3714) 			err = PTR_ERR(path);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3715) 			goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3716) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3717) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3718) 		path = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3719) 		pathbase = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3720) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3722) 	spin_lock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3723) 	cap->seq = 0;        /* reset cap seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3724) 	cap->issue_seq = 0;  /* and issue_seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3725) 	cap->mseq = 0;       /* and migrate_seq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3726) 	cap->cap_gen = cap->session->s_cap_gen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3727) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3728) 	/* These are lost when the session goes away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3729) 	if (S_ISDIR(inode->i_mode)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3730) 		if (cap->issued & CEPH_CAP_DIR_CREATE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3731) 			ceph_put_string(rcu_dereference_raw(ci->i_cached_layout.pool_ns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3732) 			memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3733) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3734) 		cap->issued &= ~CEPH_CAP_ANY_DIR_OPS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3735) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3736) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3737) 	if (recon_state->msg_version >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3738) 		rec.v2.cap_id = cpu_to_le64(cap->cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3739) 		rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3740) 		rec.v2.issued = cpu_to_le32(cap->issued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3741) 		rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3742) 		rec.v2.pathbase = cpu_to_le64(pathbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3743) 		rec.v2.flock_len = (__force __le32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3744) 			((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3745) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3746) 		rec.v1.cap_id = cpu_to_le64(cap->cap_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3747) 		rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3748) 		rec.v1.issued = cpu_to_le32(cap->issued);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3749) 		rec.v1.size = cpu_to_le64(inode->i_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3750) 		ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3751) 		ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3752) 		rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3753) 		rec.v1.pathbase = cpu_to_le64(pathbase);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3754) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3755) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3756) 	if (list_empty(&ci->i_cap_snaps)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3757) 		snap_follows = ci->i_head_snapc ? ci->i_head_snapc->seq : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3758) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3759) 		struct ceph_cap_snap *capsnap =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3760) 			list_first_entry(&ci->i_cap_snaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3761) 					 struct ceph_cap_snap, ci_item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3762) 		snap_follows = capsnap->follows;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3763) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3764) 	spin_unlock(&ci->i_ceph_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3765) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3766) 	if (recon_state->msg_version >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3767) 		int num_fcntl_locks, num_flock_locks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3768) 		struct ceph_filelock *flocks = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3769) 		size_t struct_len, total_len = sizeof(u64);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3770) 		u8 struct_v = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3772) encode_again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3773) 		if (rec.v2.flock_len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3774) 			ceph_count_locks(inode, &num_fcntl_locks, &num_flock_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3775) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3776) 			num_fcntl_locks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3777) 			num_flock_locks = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3778) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3779) 		if (num_fcntl_locks + num_flock_locks > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3780) 			flocks = kmalloc_array(num_fcntl_locks + num_flock_locks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3781) 					       sizeof(struct ceph_filelock),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3782) 					       GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3783) 			if (!flocks) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3784) 				err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3785) 				goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3786) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3787) 			err = ceph_encode_locks_to_buffer(inode, flocks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3788) 							  num_fcntl_locks,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3789) 							  num_flock_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3790) 			if (err) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3791) 				kfree(flocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3792) 				flocks = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3793) 				if (err == -ENOSPC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3794) 					goto encode_again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3795) 				goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3796) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3797) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3798) 			kfree(flocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3799) 			flocks = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3800) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3802) 		if (recon_state->msg_version >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3803) 			/* version, compat_version and struct_len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3804) 			total_len += 2 * sizeof(u8) + sizeof(u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3805) 			struct_v = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3806) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3807) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3808) 		 * number of encoded locks is stable, so copy to pagelist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3809) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3810) 		struct_len = 2 * sizeof(u32) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3811) 			    (num_fcntl_locks + num_flock_locks) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3812) 			    sizeof(struct ceph_filelock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3813) 		rec.v2.flock_len = cpu_to_le32(struct_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3814) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3815) 		struct_len += sizeof(u32) + pathlen + sizeof(rec.v2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3817) 		if (struct_v >= 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3818) 			struct_len += sizeof(u64); /* snap_follows */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3820) 		total_len += struct_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3822) 		if (pagelist->length + total_len > RECONNECT_MAX_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3823) 			err = send_reconnect_partial(recon_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3824) 			if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3825) 				goto out_freeflocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3826) 			pagelist = recon_state->pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3827) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3829) 		err = ceph_pagelist_reserve(pagelist, total_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3830) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3831) 			goto out_freeflocks;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3833) 		ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3834) 		if (recon_state->msg_version >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3835) 			ceph_pagelist_encode_8(pagelist, struct_v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3836) 			ceph_pagelist_encode_8(pagelist, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3837) 			ceph_pagelist_encode_32(pagelist, struct_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3838) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3839) 		ceph_pagelist_encode_string(pagelist, path, pathlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3840) 		ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3841) 		ceph_locks_to_pagelist(flocks, pagelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3842) 				       num_fcntl_locks, num_flock_locks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3843) 		if (struct_v >= 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3844) 			ceph_pagelist_encode_64(pagelist, snap_follows);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3845) out_freeflocks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3846) 		kfree(flocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3847) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3848) 		err = ceph_pagelist_reserve(pagelist,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3849) 					    sizeof(u64) + sizeof(u32) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3850) 					    pathlen + sizeof(rec.v1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3851) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3852) 			goto out_err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3853) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3854) 		ceph_pagelist_encode_64(pagelist, ceph_ino(inode));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3855) 		ceph_pagelist_encode_string(pagelist, path, pathlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3856) 		ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3857) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3859) out_err:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3860) 	ceph_mdsc_free_path(path, pathlen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3861) 	if (!err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3862) 		recon_state->nr_caps++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3863) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3866) static int encode_snap_realms(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3867) 			      struct ceph_reconnect_state *recon_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3868) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3869) 	struct rb_node *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3870) 	struct ceph_pagelist *pagelist = recon_state->pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3871) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3873) 	if (recon_state->msg_version >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3874) 		err = ceph_pagelist_encode_32(pagelist, mdsc->num_snap_realms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3875) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3876) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3877) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3879) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3880) 	 * snaprealms.  we provide mds with the ino, seq (version), and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3881) 	 * parent for all of our realms.  If the mds has any newer info,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3882) 	 * it will tell us.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3883) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3884) 	for (p = rb_first(&mdsc->snap_realms); p; p = rb_next(p)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3885) 		struct ceph_snap_realm *realm =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3886) 		       rb_entry(p, struct ceph_snap_realm, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3887) 		struct ceph_mds_snaprealm_reconnect sr_rec;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3889) 		if (recon_state->msg_version >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3890) 			size_t need = sizeof(u8) * 2 + sizeof(u32) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3891) 				      sizeof(sr_rec);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3893) 			if (pagelist->length + need > RECONNECT_MAX_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3894) 				err = send_reconnect_partial(recon_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3895) 				if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3896) 					goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3897) 				pagelist = recon_state->pagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3898) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3900) 			err = ceph_pagelist_reserve(pagelist, need);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3901) 			if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3902) 				goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3904) 			ceph_pagelist_encode_8(pagelist, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3905) 			ceph_pagelist_encode_8(pagelist, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3906) 			ceph_pagelist_encode_32(pagelist, sizeof(sr_rec));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3907) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3908) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3909) 		dout(" adding snap realm %llx seq %lld parent %llx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3910) 		     realm->ino, realm->seq, realm->parent_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3911) 		sr_rec.ino = cpu_to_le64(realm->ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3912) 		sr_rec.seq = cpu_to_le64(realm->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3913) 		sr_rec.parent = cpu_to_le64(realm->parent_ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3914) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3915) 		err = ceph_pagelist_append(pagelist, &sr_rec, sizeof(sr_rec));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3916) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3917) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3919) 		recon_state->nr_realms++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3920) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3921) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3922) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3923) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3924) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3926) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3927)  * If an MDS fails and recovers, clients need to reconnect in order to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3928)  * reestablish shared state.  This includes all caps issued through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3929)  * this session _and_ the snap_realm hierarchy.  Because it's not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3930)  * clear which snap realms the mds cares about, we send everything we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3931)  * know about.. that ensures we'll then get any new info the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3932)  * recovering MDS might have.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3933)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3934)  * This is a relatively heavyweight operation, but it's rare.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3935)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3936) static void send_mds_reconnect(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3937) 			       struct ceph_mds_session *session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3938) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3939) 	struct ceph_msg *reply;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3940) 	int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3941) 	int err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3942) 	struct ceph_reconnect_state recon_state = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3943) 		.session = session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3944) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3945) 	LIST_HEAD(dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3946) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3947) 	pr_info("mds%d reconnect start\n", mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3949) 	recon_state.pagelist = ceph_pagelist_alloc(GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3950) 	if (!recon_state.pagelist)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3951) 		goto fail_nopagelist;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3952) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3953) 	reply = ceph_msg_new2(CEPH_MSG_CLIENT_RECONNECT, 0, 1, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3954) 	if (!reply)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3955) 		goto fail_nomsg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3957) 	xa_destroy(&session->s_delegated_inos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3958) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3959) 	mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3960) 	session->s_state = CEPH_MDS_SESSION_RECONNECTING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3961) 	session->s_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3963) 	dout("session %p state %s\n", session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3964) 	     ceph_session_state_name(session->s_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3966) 	spin_lock(&session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3967) 	session->s_cap_gen++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3968) 	spin_unlock(&session->s_gen_ttl_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3970) 	spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3971) 	/* don't know if session is readonly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3972) 	session->s_readonly = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3973) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3974) 	 * notify __ceph_remove_cap() that we are composing cap reconnect.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3975) 	 * If a cap get released before being added to the cap reconnect,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3976) 	 * __ceph_remove_cap() should skip queuing cap release.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3977) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3978) 	session->s_cap_reconnect = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3979) 	/* drop old cap expires; we're about to reestablish that state */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3980) 	detach_cap_releases(session, &dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3981) 	spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3982) 	dispose_cap_releases(mdsc, &dispose);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3984) 	/* trim unused caps to reduce MDS's cache rejoin time */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3985) 	if (mdsc->fsc->sb->s_root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3986) 		shrink_dcache_parent(mdsc->fsc->sb->s_root);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3988) 	ceph_con_close(&session->s_con);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3989) 	ceph_con_open(&session->s_con,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3990) 		      CEPH_ENTITY_TYPE_MDS, mds,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3991) 		      ceph_mdsmap_get_addr(mdsc->mdsmap, mds));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3992) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3993) 	/* replay unsafe requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3994) 	replay_unsafe_requests(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3996) 	ceph_early_kick_flushing_caps(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3998) 	down_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4000) 	/* placeholder for nr_caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4001) 	err = ceph_pagelist_encode_32(recon_state.pagelist, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4002) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4003) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4005) 	if (test_bit(CEPHFS_FEATURE_MULTI_RECONNECT, &session->s_features)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4006) 		recon_state.msg_version = 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4007) 		recon_state.allow_multi = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4008) 	} else if (session->s_con.peer_features & CEPH_FEATURE_MDSENC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4009) 		recon_state.msg_version = 3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4010) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4011) 		recon_state.msg_version = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4012) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4013) 	/* trsaverse this session's caps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4014) 	err = ceph_iterate_session_caps(session, reconnect_caps_cb, &recon_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4016) 	spin_lock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4017) 	session->s_cap_reconnect = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4018) 	spin_unlock(&session->s_cap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4020) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4021) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4022) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4023) 	/* check if all realms can be encoded into current message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4024) 	if (mdsc->num_snap_realms) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4025) 		size_t total_len =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4026) 			recon_state.pagelist->length +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4027) 			mdsc->num_snap_realms *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4028) 			sizeof(struct ceph_mds_snaprealm_reconnect);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4029) 		if (recon_state.msg_version >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4030) 			/* number of realms */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4031) 			total_len += sizeof(u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4032) 			/* version, compat_version and struct_len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4033) 			total_len += mdsc->num_snap_realms *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4034) 				     (2 * sizeof(u8) + sizeof(u32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4035) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4036) 		if (total_len > RECONNECT_MAX_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4037) 			if (!recon_state.allow_multi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4038) 				err = -ENOSPC;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4039) 				goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4040) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4041) 			if (recon_state.nr_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4042) 				err = send_reconnect_partial(&recon_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4043) 				if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4044) 					goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4045) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4046) 			recon_state.msg_version = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4047) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4048) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4049) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4050) 	err = encode_snap_realms(mdsc, &recon_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4051) 	if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4052) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4053) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4054) 	if (recon_state.msg_version >= 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4055) 		err = ceph_pagelist_encode_8(recon_state.pagelist, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4056) 		if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4057) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4058) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4059) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4060) 	if (recon_state.nr_caps || recon_state.nr_realms) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4061) 		struct page *page =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4062) 			list_first_entry(&recon_state.pagelist->head,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4063) 					struct page, lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4064) 		__le32 *addr = kmap_atomic(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4065) 		if (recon_state.nr_caps) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4066) 			WARN_ON(recon_state.nr_realms != mdsc->num_snap_realms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4067) 			*addr = cpu_to_le32(recon_state.nr_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4068) 		} else if (recon_state.msg_version >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4069) 			*(addr + 1) = cpu_to_le32(recon_state.nr_realms);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4070) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4071) 		kunmap_atomic(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4072) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4074) 	reply->hdr.version = cpu_to_le16(recon_state.msg_version);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4075) 	if (recon_state.msg_version >= 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4076) 		reply->hdr.compat_version = cpu_to_le16(4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4078) 	reply->hdr.data_len = cpu_to_le32(recon_state.pagelist->length);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4079) 	ceph_msg_data_add_pagelist(reply, recon_state.pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4081) 	ceph_con_send(&session->s_con, reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4083) 	mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4084) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4085) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4086) 	__wake_requests(mdsc, &session->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4087) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4088) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4089) 	up_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4090) 	ceph_pagelist_release(recon_state.pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4091) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4093) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4094) 	ceph_msg_put(reply);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4095) 	up_read(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4096) 	mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4097) fail_nomsg:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4098) 	ceph_pagelist_release(recon_state.pagelist);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4099) fail_nopagelist:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4100) 	pr_err("error %d preparing reconnect for mds%d\n", err, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4101) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4105) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4106)  * compare old and new mdsmaps, kicking requests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4107)  * and closing out old connections as necessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4108)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4109)  * called under mdsc->mutex.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4110)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4111) static void check_new_map(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4112) 			  struct ceph_mdsmap *newmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4113) 			  struct ceph_mdsmap *oldmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4114) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4115) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4116) 	int oldstate, newstate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4117) 	struct ceph_mds_session *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4119) 	dout("check_new_map new %u old %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4120) 	     newmap->m_epoch, oldmap->m_epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4122) 	for (i = 0; i < oldmap->possible_max_rank && i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4123) 		if (!mdsc->sessions[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4124) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4125) 		s = mdsc->sessions[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4126) 		oldstate = ceph_mdsmap_get_state(oldmap, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4127) 		newstate = ceph_mdsmap_get_state(newmap, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4129) 		dout("check_new_map mds%d state %s%s -> %s%s (session %s)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4130) 		     i, ceph_mds_state_name(oldstate),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4131) 		     ceph_mdsmap_is_laggy(oldmap, i) ? " (laggy)" : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4132) 		     ceph_mds_state_name(newstate),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4133) 		     ceph_mdsmap_is_laggy(newmap, i) ? " (laggy)" : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4134) 		     ceph_session_state_name(s->s_state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4136) 		if (i >= newmap->possible_max_rank) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4137) 			/* force close session for stopped mds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4138) 			ceph_get_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4139) 			__unregister_session(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4140) 			__wake_requests(mdsc, &s->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4141) 			mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4143) 			mutex_lock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4144) 			cleanup_session_requests(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4145) 			remove_session_caps(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4146) 			mutex_unlock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4147) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4148) 			ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4150) 			mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4151) 			kick_requests(mdsc, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4152) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4153) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4155) 		if (memcmp(ceph_mdsmap_get_addr(oldmap, i),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4156) 			   ceph_mdsmap_get_addr(newmap, i),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4157) 			   sizeof(struct ceph_entity_addr))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4158) 			/* just close it */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4159) 			mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4160) 			mutex_lock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4161) 			mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4162) 			ceph_con_close(&s->s_con);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4163) 			mutex_unlock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4164) 			s->s_state = CEPH_MDS_SESSION_RESTARTING;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4165) 		} else if (oldstate == newstate) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4166) 			continue;  /* nothing new with this mds */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4167) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4169) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4170) 		 * send reconnect?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4171) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4172) 		if (s->s_state == CEPH_MDS_SESSION_RESTARTING &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4173) 		    newstate >= CEPH_MDS_STATE_RECONNECT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4174) 			mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4175) 			send_mds_reconnect(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4176) 			mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4177) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4179) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4180) 		 * kick request on any mds that has gone active.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4181) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4182) 		if (oldstate < CEPH_MDS_STATE_ACTIVE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4183) 		    newstate >= CEPH_MDS_STATE_ACTIVE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4184) 			if (oldstate != CEPH_MDS_STATE_CREATING &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4185) 			    oldstate != CEPH_MDS_STATE_STARTING)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4186) 				pr_info("mds%d recovery completed\n", s->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4187) 			kick_requests(mdsc, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4188) 			mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4189) 			mutex_lock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4190) 			mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4191) 			ceph_kick_flushing_caps(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4192) 			mutex_unlock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4193) 			wake_up_session_caps(s, RECONNECT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4194) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4195) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4197) 	for (i = 0; i < newmap->possible_max_rank && i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4198) 		s = mdsc->sessions[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4199) 		if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4200) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4201) 		if (!ceph_mdsmap_is_laggy(newmap, i))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4202) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4203) 		if (s->s_state == CEPH_MDS_SESSION_OPEN ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4204) 		    s->s_state == CEPH_MDS_SESSION_HUNG ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4205) 		    s->s_state == CEPH_MDS_SESSION_CLOSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4206) 			dout(" connecting to export targets of laggy mds%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4207) 			     i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4208) 			__open_export_target_sessions(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4209) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4210) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4215) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4216)  * leases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4217)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4219) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4220)  * caller must hold session s_mutex, dentry->d_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4221)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4222) void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4224) 	struct ceph_dentry_info *di = ceph_dentry(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4226) 	ceph_put_mds_session(di->lease_session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4227) 	di->lease_session = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4230) static void handle_lease(struct ceph_mds_client *mdsc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4231) 			 struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4232) 			 struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4233) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4234) 	struct super_block *sb = mdsc->fsc->sb;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4235) 	struct inode *inode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4236) 	struct dentry *parent, *dentry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4237) 	struct ceph_dentry_info *di;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4238) 	int mds = session->s_mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4239) 	struct ceph_mds_lease *h = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4240) 	u32 seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4241) 	struct ceph_vino vino;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4242) 	struct qstr dname;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4243) 	int release = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4245) 	dout("handle_lease from mds%d\n", mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4247) 	/* decode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4248) 	if (msg->front.iov_len < sizeof(*h) + sizeof(u32))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4249) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4250) 	vino.ino = le64_to_cpu(h->ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4251) 	vino.snap = CEPH_NOSNAP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4252) 	seq = le32_to_cpu(h->seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4253) 	dname.len = get_unaligned_le32(h + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4254) 	if (msg->front.iov_len < sizeof(*h) + sizeof(u32) + dname.len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4255) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4256) 	dname.name = (void *)(h + 1) + sizeof(u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4258) 	/* lookup inode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4259) 	inode = ceph_find_inode(sb, vino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4260) 	dout("handle_lease %s, ino %llx %p %.*s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4261) 	     ceph_lease_op_name(h->action), vino.ino, inode,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4262) 	     dname.len, dname.name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4264) 	mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4265) 	inc_session_sequence(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4267) 	if (!inode) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4268) 		dout("handle_lease no inode %llx\n", vino.ino);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4269) 		goto release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4270) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4272) 	/* dentry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4273) 	parent = d_find_alias(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4274) 	if (!parent) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4275) 		dout("no parent dentry on inode %p\n", inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4276) 		WARN_ON(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4277) 		goto release;  /* hrm... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4278) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4279) 	dname.hash = full_name_hash(parent, dname.name, dname.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4280) 	dentry = d_lookup(parent, &dname);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4281) 	dput(parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4282) 	if (!dentry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4283) 		goto release;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4285) 	spin_lock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4286) 	di = ceph_dentry(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4287) 	switch (h->action) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4288) 	case CEPH_MDS_LEASE_REVOKE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4289) 		if (di->lease_session == session) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4290) 			if (ceph_seq_cmp(di->lease_seq, seq) > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4291) 				h->seq = cpu_to_le32(di->lease_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4292) 			__ceph_mdsc_drop_dentry_lease(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4293) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4294) 		release = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4295) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4297) 	case CEPH_MDS_LEASE_RENEW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4298) 		if (di->lease_session == session &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4299) 		    di->lease_gen == session->s_cap_gen &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4300) 		    di->lease_renew_from &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4301) 		    di->lease_renew_after == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4302) 			unsigned long duration =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4303) 				msecs_to_jiffies(le32_to_cpu(h->duration_ms));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4305) 			di->lease_seq = seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4306) 			di->time = di->lease_renew_from + duration;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4307) 			di->lease_renew_after = di->lease_renew_from +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4308) 				(duration >> 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4309) 			di->lease_renew_from = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4310) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4311) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4312) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4313) 	spin_unlock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4314) 	dput(dentry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4316) 	if (!release)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4317) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4319) release:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4320) 	/* let's just reuse the same message */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4321) 	h->action = CEPH_MDS_LEASE_REVOKE_ACK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4322) 	ceph_msg_get(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4323) 	ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4325) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4326) 	mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4327) 	/* avoid calling iput_final() in mds dispatch threads */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4328) 	ceph_async_iput(inode);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4329) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4331) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4332) 	pr_err("corrupt lease message\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4333) 	ceph_msg_dump(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4334) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4336) void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4337) 			      struct dentry *dentry, char action,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4338) 			      u32 seq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4339) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4340) 	struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4341) 	struct ceph_mds_lease *lease;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4342) 	struct inode *dir;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4343) 	int len = sizeof(*lease) + sizeof(u32) + NAME_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4345) 	dout("lease_send_msg identry %p %s to mds%d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4346) 	     dentry, ceph_lease_op_name(action), session->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4348) 	msg = ceph_msg_new(CEPH_MSG_CLIENT_LEASE, len, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4349) 	if (!msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4350) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4351) 	lease = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4352) 	lease->action = action;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4353) 	lease->seq = cpu_to_le32(seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4354) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4355) 	spin_lock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4356) 	dir = d_inode(dentry->d_parent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4357) 	lease->ino = cpu_to_le64(ceph_ino(dir));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4358) 	lease->first = lease->last = cpu_to_le64(ceph_snap(dir));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4360) 	put_unaligned_le32(dentry->d_name.len, lease + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4361) 	memcpy((void *)(lease + 1) + 4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4362) 	       dentry->d_name.name, dentry->d_name.len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4363) 	spin_unlock(&dentry->d_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4364) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4365) 	 * if this is a preemptive lease RELEASE, no need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4366) 	 * flush request stream, since the actual request will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4367) 	 * soon follow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4368) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4369) 	msg->more_to_follow = (action == CEPH_MDS_LEASE_RELEASE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4371) 	ceph_con_send(&session->s_con, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4372) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4374) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4375)  * lock unlock sessions, to wait ongoing session activities
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4376)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4377) static void lock_unlock_sessions(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4378) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4379) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4381) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4382) 	for (i = 0; i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4383) 		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4384) 		if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4385) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4386) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4387) 		mutex_lock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4388) 		mutex_unlock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4389) 		ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4390) 		mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4391) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4392) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4393) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4395) static void maybe_recover_session(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4397) 	struct ceph_fs_client *fsc = mdsc->fsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4399) 	if (!ceph_test_mount_opt(fsc, CLEANRECOVER))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4400) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4402) 	if (READ_ONCE(fsc->mount_state) != CEPH_MOUNT_MOUNTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4403) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4405) 	if (!READ_ONCE(fsc->blocklisted))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4406) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4408) 	if (fsc->last_auto_reconnect &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4409) 	    time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4410) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4412) 	pr_info("auto reconnect after blocklisted\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4413) 	fsc->last_auto_reconnect = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4414) 	ceph_force_reconnect(fsc->sb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4415) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4416) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4417) bool check_session_state(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4418) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4419) 	switch (s->s_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4420) 	case CEPH_MDS_SESSION_OPEN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4421) 		if (s->s_ttl && time_after(jiffies, s->s_ttl)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4422) 			s->s_state = CEPH_MDS_SESSION_HUNG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4423) 			pr_info("mds%d hung\n", s->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4424) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4425) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4426) 	case CEPH_MDS_SESSION_CLOSING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4427) 		/* Should never reach this when we're unmounting */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4428) 		WARN_ON_ONCE(s->s_ttl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4429) 		fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4430) 	case CEPH_MDS_SESSION_NEW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4431) 	case CEPH_MDS_SESSION_RESTARTING:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4432) 	case CEPH_MDS_SESSION_CLOSED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4433) 	case CEPH_MDS_SESSION_REJECTED:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4434) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4435) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4437) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4438) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4440) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4441)  * If the sequence is incremented while we're waiting on a REQUEST_CLOSE reply,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4442)  * then we need to retransmit that request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4443)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4444) void inc_session_sequence(struct ceph_mds_session *s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4445) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4446) 	lockdep_assert_held(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4448) 	s->s_seq++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4450) 	if (s->s_state == CEPH_MDS_SESSION_CLOSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4451) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4453) 		dout("resending session close request for mds%d\n", s->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4454) 		ret = request_close_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4455) 		if (ret < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4456) 			pr_err("unable to close session to mds%d: %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4457) 			       s->s_mds, ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4458) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4459) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4461) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4462)  * delayed work -- periodically trim expired leases, renew caps with mds.  If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4463)  * the @delay parameter is set to 0 or if it's more than 5 secs, the default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4464)  * workqueue delay value of 5 secs will be used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4465)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4466) static void schedule_delayed(struct ceph_mds_client *mdsc, unsigned long delay)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4468) 	unsigned long max_delay = HZ * 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4470) 	/* 5 secs default delay */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4471) 	if (!delay || (delay > max_delay))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4472) 		delay = max_delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4473) 	schedule_delayed_work(&mdsc->delayed_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4474) 			      round_jiffies_relative(delay));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4475) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4477) static void delayed_work(struct work_struct *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4479) 	struct ceph_mds_client *mdsc =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4480) 		container_of(work, struct ceph_mds_client, delayed_work.work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4481) 	unsigned long delay;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4482) 	int renew_interval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4483) 	int renew_caps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4484) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4486) 	dout("mdsc delayed_work\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4487) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4488) 	if (mdsc->stopping)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4489) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4491) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4492) 	renew_interval = mdsc->mdsmap->m_session_timeout >> 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4493) 	renew_caps = time_after_eq(jiffies, HZ*renew_interval +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4494) 				   mdsc->last_renew_caps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4495) 	if (renew_caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4496) 		mdsc->last_renew_caps = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4498) 	for (i = 0; i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4499) 		struct ceph_mds_session *s = __ceph_lookup_mds_session(mdsc, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4500) 		if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4501) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4503) 		if (!check_session_state(s)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4504) 			ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4505) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4506) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4507) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4508) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4509) 		mutex_lock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4510) 		if (renew_caps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4511) 			send_renew_caps(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4512) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4513) 			ceph_con_keepalive(&s->s_con);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4514) 		if (s->s_state == CEPH_MDS_SESSION_OPEN ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4515) 		    s->s_state == CEPH_MDS_SESSION_HUNG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4516) 			ceph_send_cap_releases(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4517) 		mutex_unlock(&s->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4518) 		ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4520) 		mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4521) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4522) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4524) 	delay = ceph_check_delayed_caps(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4526) 	ceph_queue_cap_reclaim_work(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4528) 	ceph_trim_snapid_map(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4530) 	maybe_recover_session(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4532) 	schedule_delayed(mdsc, delay);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4533) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4535) int ceph_mdsc_init(struct ceph_fs_client *fsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4536) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4538) 	struct ceph_mds_client *mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4539) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4541) 	mdsc = kzalloc(sizeof(struct ceph_mds_client), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4542) 	if (!mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4543) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4544) 	mdsc->fsc = fsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4545) 	mutex_init(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4546) 	mdsc->mdsmap = kzalloc(sizeof(*mdsc->mdsmap), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4547) 	if (!mdsc->mdsmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4548) 		err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4549) 		goto err_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4550) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4552) 	init_completion(&mdsc->safe_umount_waiters);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4553) 	init_waitqueue_head(&mdsc->session_close_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4554) 	INIT_LIST_HEAD(&mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4555) 	mdsc->sessions = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4556) 	atomic_set(&mdsc->num_sessions, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4557) 	mdsc->max_sessions = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4558) 	mdsc->stopping = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4559) 	atomic64_set(&mdsc->quotarealms_count, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4560) 	mdsc->quotarealms_inodes = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4561) 	mutex_init(&mdsc->quotarealms_inodes_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4562) 	mdsc->last_snap_seq = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4563) 	init_rwsem(&mdsc->snap_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4564) 	mdsc->snap_realms = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4565) 	INIT_LIST_HEAD(&mdsc->snap_empty);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4566) 	mdsc->num_snap_realms = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4567) 	spin_lock_init(&mdsc->snap_empty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4568) 	mdsc->last_tid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4569) 	mdsc->oldest_tid = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4570) 	mdsc->request_tree = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4571) 	INIT_DELAYED_WORK(&mdsc->delayed_work, delayed_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4572) 	mdsc->last_renew_caps = jiffies;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4573) 	INIT_LIST_HEAD(&mdsc->cap_delay_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4574) 	INIT_LIST_HEAD(&mdsc->cap_wait_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4575) 	spin_lock_init(&mdsc->cap_delay_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4576) 	INIT_LIST_HEAD(&mdsc->snap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4577) 	spin_lock_init(&mdsc->snap_flush_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4578) 	mdsc->last_cap_flush_tid = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4579) 	INIT_LIST_HEAD(&mdsc->cap_flush_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4580) 	INIT_LIST_HEAD(&mdsc->cap_dirty_migrating);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4581) 	mdsc->num_cap_flushing = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4582) 	spin_lock_init(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4583) 	init_waitqueue_head(&mdsc->cap_flushing_wq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4584) 	INIT_WORK(&mdsc->cap_reclaim_work, ceph_cap_reclaim_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4585) 	atomic_set(&mdsc->cap_reclaim_pending, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4586) 	err = ceph_metric_init(&mdsc->metric);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4587) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4588) 		goto err_mdsmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4590) 	spin_lock_init(&mdsc->dentry_list_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4591) 	INIT_LIST_HEAD(&mdsc->dentry_leases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4592) 	INIT_LIST_HEAD(&mdsc->dentry_dir_leases);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4593) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4594) 	ceph_caps_init(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4595) 	ceph_adjust_caps_max_min(mdsc, fsc->mount_options);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4597) 	spin_lock_init(&mdsc->snapid_map_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4598) 	mdsc->snapid_map_tree = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4599) 	INIT_LIST_HEAD(&mdsc->snapid_map_lru);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4601) 	init_rwsem(&mdsc->pool_perm_rwsem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4602) 	mdsc->pool_perm_tree = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4604) 	strscpy(mdsc->nodename, utsname()->nodename,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4605) 		sizeof(mdsc->nodename));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4607) 	fsc->mdsc = mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4608) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4610) err_mdsmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4611) 	kfree(mdsc->mdsmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4612) err_mdsc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4613) 	kfree(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4614) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4617) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4618)  * Wait for safe replies on open mds requests.  If we time out, drop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4619)  * all requests from the tree to avoid dangling dentry refs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4620)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4621) static void wait_requests(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4622) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4623) 	struct ceph_options *opts = mdsc->fsc->client->options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4624) 	struct ceph_mds_request *req;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4626) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4627) 	if (__get_oldest_req(mdsc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4628) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4630) 		dout("wait_requests waiting for requests\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4631) 		wait_for_completion_timeout(&mdsc->safe_umount_waiters,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4632) 				    ceph_timeout_jiffies(opts->mount_timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4634) 		/* tear down remaining requests */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4635) 		mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4636) 		while ((req = __get_oldest_req(mdsc))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4637) 			dout("wait_requests timed out on tid %llu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4638) 			     req->r_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4639) 			list_del_init(&req->r_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4640) 			__unregister_request(mdsc, req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4641) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4642) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4643) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4644) 	dout("wait_requests done\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4647) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4648)  * called before mount is ro, and before dentries are torn down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4649)  * (hmm, does this still race with new lookups?)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4650)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4651) void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4652) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4653) 	dout("pre_umount\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4654) 	mdsc->stopping = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4656) 	lock_unlock_sessions(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4657) 	ceph_flush_dirty_caps(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4658) 	wait_requests(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4660) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4661) 	 * wait for reply handlers to drop their request refs and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4662) 	 * their inode/dcache refs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4663) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4664) 	ceph_msgr_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4665) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4666) 	ceph_cleanup_quotarealms_inodes(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4667) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4669) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4670)  * wait for all write mds requests to flush.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4671)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4672) static void wait_unsafe_requests(struct ceph_mds_client *mdsc, u64 want_tid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4673) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4674) 	struct ceph_mds_request *req = NULL, *nextreq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4675) 	struct rb_node *n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4677) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4678) 	dout("wait_unsafe_requests want %lld\n", want_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4679) restart:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4680) 	req = __get_oldest_req(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4681) 	while (req && req->r_tid <= want_tid) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4682) 		/* find next request */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4683) 		n = rb_next(&req->r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4684) 		if (n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4685) 			nextreq = rb_entry(n, struct ceph_mds_request, r_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4686) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4687) 			nextreq = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4688) 		if (req->r_op != CEPH_MDS_OP_SETFILELOCK &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4689) 		    (req->r_op & CEPH_MDS_OP_WRITE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4690) 			/* write op */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4691) 			ceph_mdsc_get_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4692) 			if (nextreq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4693) 				ceph_mdsc_get_request(nextreq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4694) 			mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4695) 			dout("wait_unsafe_requests  wait on %llu (want %llu)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4696) 			     req->r_tid, want_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4697) 			wait_for_completion(&req->r_safe_completion);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4698) 			mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4699) 			ceph_mdsc_put_request(req);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4700) 			if (!nextreq)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4701) 				break;  /* next dne before, so we're done! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4702) 			if (RB_EMPTY_NODE(&nextreq->r_node)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4703) 				/* next request was removed from tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4704) 				ceph_mdsc_put_request(nextreq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4705) 				goto restart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4706) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4707) 			ceph_mdsc_put_request(nextreq);  /* won't go away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4708) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4709) 		req = nextreq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4710) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4711) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4712) 	dout("wait_unsafe_requests done\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4715) void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4716) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4717) 	u64 want_tid, want_flush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4719) 	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4720) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4722) 	dout("sync\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4723) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4724) 	want_tid = mdsc->last_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4725) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4727) 	ceph_flush_dirty_caps(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4728) 	spin_lock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4729) 	want_flush = mdsc->last_cap_flush_tid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4730) 	if (!list_empty(&mdsc->cap_flush_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4731) 		struct ceph_cap_flush *cf =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4732) 			list_last_entry(&mdsc->cap_flush_list,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4733) 					struct ceph_cap_flush, g_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4734) 		cf->wake = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4735) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4736) 	spin_unlock(&mdsc->cap_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4738) 	dout("sync want tid %lld flush_seq %lld\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4739) 	     want_tid, want_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4741) 	wait_unsafe_requests(mdsc, want_tid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4742) 	wait_caps_flush(mdsc, want_flush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4743) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4745) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4746)  * true if all sessions are closed, or we force unmount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4747)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4748) static bool done_closing_sessions(struct ceph_mds_client *mdsc, int skipped)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4749) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4750) 	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4751) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4752) 	return atomic_read(&mdsc->num_sessions) <= skipped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4755) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4756)  * called after sb is ro.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4757)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4758) void ceph_mdsc_close_sessions(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4759) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4760) 	struct ceph_options *opts = mdsc->fsc->client->options;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4761) 	struct ceph_mds_session *session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4762) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4763) 	int skipped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4765) 	dout("close_sessions\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4767) 	/* close sessions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4768) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4769) 	for (i = 0; i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4770) 		session = __ceph_lookup_mds_session(mdsc, i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4771) 		if (!session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4772) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4773) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4774) 		mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4775) 		if (__close_session(mdsc, session) <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4776) 			skipped++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4777) 		mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4778) 		ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4779) 		mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4780) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4781) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4783) 	dout("waiting for sessions to close\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4784) 	wait_event_timeout(mdsc->session_close_wq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4785) 			   done_closing_sessions(mdsc, skipped),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4786) 			   ceph_timeout_jiffies(opts->mount_timeout));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4788) 	/* tear down remaining sessions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4789) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4790) 	for (i = 0; i < mdsc->max_sessions; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4791) 		if (mdsc->sessions[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4792) 			session = ceph_get_mds_session(mdsc->sessions[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4793) 			__unregister_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4794) 			mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4795) 			mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4796) 			remove_session_caps(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4797) 			mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4798) 			ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4799) 			mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4800) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4801) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4802) 	WARN_ON(!list_empty(&mdsc->cap_delay_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4803) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4805) 	ceph_cleanup_snapid_map(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4806) 	ceph_cleanup_empty_realms(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4808) 	cancel_work_sync(&mdsc->cap_reclaim_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4809) 	cancel_delayed_work_sync(&mdsc->delayed_work); /* cancel timer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4811) 	dout("stopped\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4814) void ceph_mdsc_force_umount(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4815) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4816) 	struct ceph_mds_session *session;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4817) 	int mds;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4818) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4819) 	dout("force umount\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4821) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4822) 	for (mds = 0; mds < mdsc->max_sessions; mds++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4823) 		session = __ceph_lookup_mds_session(mdsc, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4824) 		if (!session)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4825) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4826) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4827) 		if (session->s_state == CEPH_MDS_SESSION_REJECTED)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4828) 			__unregister_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4829) 		__wake_requests(mdsc, &session->s_waiting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4830) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4831) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4832) 		mutex_lock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4833) 		__close_session(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4834) 		if (session->s_state == CEPH_MDS_SESSION_CLOSING) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4835) 			cleanup_session_requests(mdsc, session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4836) 			remove_session_caps(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4837) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4838) 		mutex_unlock(&session->s_mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4839) 		ceph_put_mds_session(session);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4841) 		mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4842) 		kick_requests(mdsc, mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4843) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4844) 	__wake_requests(mdsc, &mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4845) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4846) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4848) static void ceph_mdsc_stop(struct ceph_mds_client *mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4849) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4850) 	dout("stop\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4851) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4852) 	 * Make sure the delayed work stopped before releasing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4853) 	 * the resources.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4854) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4855) 	 * Because the cancel_delayed_work_sync() will only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4856) 	 * guarantee that the work finishes executing. But the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4857) 	 * delayed work will re-arm itself again after that.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4858) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4859) 	flush_delayed_work(&mdsc->delayed_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4860) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4861) 	if (mdsc->mdsmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4862) 		ceph_mdsmap_destroy(mdsc->mdsmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4863) 	kfree(mdsc->sessions);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4864) 	ceph_caps_finalize(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4865) 	ceph_pool_perm_destroy(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4867) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4868) void ceph_mdsc_destroy(struct ceph_fs_client *fsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4869) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4870) 	struct ceph_mds_client *mdsc = fsc->mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4871) 	dout("mdsc_destroy %p\n", mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4872) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4873) 	if (!mdsc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4874) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4875) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4876) 	/* flush out any connection work with references to us */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4877) 	ceph_msgr_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4879) 	ceph_mdsc_stop(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4880) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4881) 	ceph_metric_destroy(&mdsc->metric);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4882) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4883) 	fsc->mdsc = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4884) 	kfree(mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4885) 	dout("mdsc_destroy %p done\n", mdsc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4886) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4887) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4888) void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4889) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4890) 	struct ceph_fs_client *fsc = mdsc->fsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4891) 	const char *mds_namespace = fsc->mount_options->mds_namespace;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4892) 	void *p = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4893) 	void *end = p + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4894) 	u32 epoch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4895) 	u32 map_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4896) 	u32 num_fs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4897) 	u32 mount_fscid = (u32)-1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4898) 	u8 struct_v, struct_cv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4899) 	int err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4901) 	ceph_decode_need(&p, end, sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4902) 	epoch = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4904) 	dout("handle_fsmap epoch %u\n", epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4906) 	ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4907) 	struct_v = ceph_decode_8(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4908) 	struct_cv = ceph_decode_8(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4909) 	map_len = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4911) 	ceph_decode_need(&p, end, sizeof(u32) * 3, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4912) 	p += sizeof(u32) * 2; /* skip epoch and legacy_client_fscid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4914) 	num_fs = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4915) 	while (num_fs-- > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4916) 		void *info_p, *info_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4917) 		u32 info_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4918) 		u8 info_v, info_cv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4919) 		u32 fscid, namelen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4921) 		ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4922) 		info_v = ceph_decode_8(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4923) 		info_cv = ceph_decode_8(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4924) 		info_len = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4925) 		ceph_decode_need(&p, end, info_len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4926) 		info_p = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4927) 		info_end = p + info_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4928) 		p = info_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4929) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4930) 		ceph_decode_need(&info_p, info_end, sizeof(u32) * 2, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4931) 		fscid = ceph_decode_32(&info_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4932) 		namelen = ceph_decode_32(&info_p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4933) 		ceph_decode_need(&info_p, info_end, namelen, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4934) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4935) 		if (mds_namespace &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4936) 		    strlen(mds_namespace) == namelen &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4937) 		    !strncmp(mds_namespace, (char *)info_p, namelen)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4938) 			mount_fscid = fscid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4939) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4940) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4941) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4943) 	ceph_monc_got_map(&fsc->client->monc, CEPH_SUB_FSMAP, epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4944) 	if (mount_fscid != (u32)-1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4945) 		fsc->client->monc.fs_cluster_id = mount_fscid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4946) 		ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4947) 				   0, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4948) 		ceph_monc_renew_subs(&fsc->client->monc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4949) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4950) 		err = -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4951) 		goto err_out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4952) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4953) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4954) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4955) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4956) 	pr_err("error decoding fsmap\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4957) err_out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4958) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4959) 	mdsc->mdsmap_err = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4960) 	__wake_requests(mdsc, &mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4961) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4962) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4963) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4964) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4965)  * handle mds map update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4966)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4967) void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4968) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4969) 	u32 epoch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4970) 	u32 maplen;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4971) 	void *p = msg->front.iov_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4972) 	void *end = p + msg->front.iov_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4973) 	struct ceph_mdsmap *newmap, *oldmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4974) 	struct ceph_fsid fsid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4975) 	int err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4976) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4977) 	ceph_decode_need(&p, end, sizeof(fsid)+2*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4978) 	ceph_decode_copy(&p, &fsid, sizeof(fsid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4979) 	if (ceph_check_fsid(mdsc->fsc->client, &fsid) < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4980) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4981) 	epoch = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4982) 	maplen = ceph_decode_32(&p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4983) 	dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4984) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4985) 	/* do we need it? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4986) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4987) 	if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4988) 		dout("handle_map epoch %u <= our %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4989) 		     epoch, mdsc->mdsmap->m_epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4990) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4991) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4992) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4994) 	newmap = ceph_mdsmap_decode(&p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4995) 	if (IS_ERR(newmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4996) 		err = PTR_ERR(newmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4997) 		goto bad_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4998) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5000) 	/* swap into place */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5001) 	if (mdsc->mdsmap) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5002) 		oldmap = mdsc->mdsmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5003) 		mdsc->mdsmap = newmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5004) 		check_new_map(mdsc, newmap, oldmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5005) 		ceph_mdsmap_destroy(oldmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5006) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5007) 		mdsc->mdsmap = newmap;  /* first mds map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5008) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5009) 	mdsc->fsc->max_file_size = min((loff_t)mdsc->mdsmap->m_max_file_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5010) 					MAX_LFS_FILESIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5012) 	__wake_requests(mdsc, &mdsc->waiting_for_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5013) 	ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5014) 			  mdsc->mdsmap->m_epoch);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5016) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5017) 	schedule_delayed(mdsc, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5018) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5019) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5020) bad_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5021) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5022) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5023) 	pr_err("error decoding mdsmap %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5024) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5026) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5027) static struct ceph_connection *con_get(struct ceph_connection *con)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5028) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5029) 	struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5031) 	if (ceph_get_mds_session(s))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5032) 		return con;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5033) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5034) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5035) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5036) static void con_put(struct ceph_connection *con)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5037) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5038) 	struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5040) 	ceph_put_mds_session(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5041) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5042) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5043) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5044)  * if the client is unresponsive for long enough, the mds will kill
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5045)  * the session entirely.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5046)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5047) static void peer_reset(struct ceph_connection *con)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5048) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5049) 	struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5050) 	struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5051) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5052) 	pr_warn("mds%d closed our session\n", s->s_mds);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5053) 	send_mds_reconnect(mdsc, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5054) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5055) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5056) static void dispatch(struct ceph_connection *con, struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5057) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5058) 	struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5059) 	struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5060) 	int type = le16_to_cpu(msg->hdr.type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5061) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5062) 	mutex_lock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5063) 	if (__verify_registered_session(mdsc, s) < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5064) 		mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5065) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5066) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5067) 	mutex_unlock(&mdsc->mutex);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5068) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5069) 	switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5070) 	case CEPH_MSG_MDS_MAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5071) 		ceph_mdsc_handle_mdsmap(mdsc, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5072) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5073) 	case CEPH_MSG_FS_MAP_USER:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5074) 		ceph_mdsc_handle_fsmap(mdsc, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5075) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5076) 	case CEPH_MSG_CLIENT_SESSION:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5077) 		handle_session(s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5078) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5079) 	case CEPH_MSG_CLIENT_REPLY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5080) 		handle_reply(s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5081) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5082) 	case CEPH_MSG_CLIENT_REQUEST_FORWARD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5083) 		handle_forward(mdsc, s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5084) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5085) 	case CEPH_MSG_CLIENT_CAPS:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5086) 		ceph_handle_caps(s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5087) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5088) 	case CEPH_MSG_CLIENT_SNAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5089) 		ceph_handle_snap(mdsc, s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5090) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5091) 	case CEPH_MSG_CLIENT_LEASE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5092) 		handle_lease(mdsc, s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5093) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5094) 	case CEPH_MSG_CLIENT_QUOTA:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5095) 		ceph_handle_quota(mdsc, s, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5096) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5098) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5099) 		pr_err("received unknown message type %d %s\n", type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5100) 		       ceph_msg_type_name(type));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5101) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5102) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5103) 	ceph_msg_put(msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5106) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5107)  * authentication
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5108)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5110) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5111)  * Note: returned pointer is the address of a structure that's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5112)  * managed separately.  Caller must *not* attempt to free it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5113)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5114) static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5115) 					int *proto, int force_new)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5116) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5117) 	struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5118) 	struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5119) 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5120) 	struct ceph_auth_handshake *auth = &s->s_auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5122) 	if (force_new && auth->authorizer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5123) 		ceph_auth_destroy_authorizer(auth->authorizer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5124) 		auth->authorizer = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5125) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5126) 	if (!auth->authorizer) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5127) 		int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5128) 						      auth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5129) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5130) 			return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5131) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5132) 		int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5133) 						      auth);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5134) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5135) 			return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5136) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5137) 	*proto = ac->protocol;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5139) 	return auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5142) static int add_authorizer_challenge(struct ceph_connection *con,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5143) 				    void *challenge_buf, int challenge_buf_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5145) 	struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5146) 	struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5147) 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5149) 	return ceph_auth_add_authorizer_challenge(ac, s->s_auth.authorizer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5150) 					    challenge_buf, challenge_buf_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5153) static int verify_authorizer_reply(struct ceph_connection *con)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5154) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5155) 	struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5156) 	struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5157) 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5159) 	return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5162) static int invalidate_authorizer(struct ceph_connection *con)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5164) 	struct ceph_mds_session *s = con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5165) 	struct ceph_mds_client *mdsc = s->s_mdsc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5166) 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5168) 	ceph_auth_invalidate_authorizer(ac, CEPH_ENTITY_TYPE_MDS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5170) 	return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5173) static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5174) 				struct ceph_msg_header *hdr, int *skip)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5175) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5176) 	struct ceph_msg *msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5177) 	int type = (int) le16_to_cpu(hdr->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5178) 	int front_len = (int) le32_to_cpu(hdr->front_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5180) 	if (con->in_msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5181) 		return con->in_msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5183) 	*skip = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5184) 	msg = ceph_msg_new(type, front_len, GFP_NOFS, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5185) 	if (!msg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5186) 		pr_err("unable to allocate msg type %d len %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5187) 		       type, front_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5188) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5189) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5191) 	return msg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5194) static int mds_sign_message(struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5195) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5196)        struct ceph_mds_session *s = msg->con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5197)        struct ceph_auth_handshake *auth = &s->s_auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5199)        return ceph_auth_sign_message(auth, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5200) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5202) static int mds_check_message_signature(struct ceph_msg *msg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5203) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5204)        struct ceph_mds_session *s = msg->con->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5205)        struct ceph_auth_handshake *auth = &s->s_auth;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5207)        return ceph_auth_check_message_signature(auth, msg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5210) static const struct ceph_connection_operations mds_con_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5211) 	.get = con_get,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5212) 	.put = con_put,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5213) 	.dispatch = dispatch,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5214) 	.get_authorizer = get_authorizer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5215) 	.add_authorizer_challenge = add_authorizer_challenge,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5216) 	.verify_authorizer_reply = verify_authorizer_reply,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5217) 	.invalidate_authorizer = invalidate_authorizer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5218) 	.peer_reset = peer_reset,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5219) 	.alloc_msg = mds_alloc_msg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5220) 	.sign_message = mds_sign_message,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5221) 	.check_message_signature = mds_check_message_signature,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5222) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5224) /* eof */