Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3) #include <linux/ceph/ceph_debug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) #include <linux/ceph/libceph.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) #include <linux/ceph/osdmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/ceph/decode.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/crush/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/crush/mapper.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) char *ceph_osdmap_state_str(char *str, int len, u32 state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) 	if (!len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) 		return str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) 	if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) 		snprintf(str, len, "exists, up");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) 	else if (state & CEPH_OSD_EXISTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) 		snprintf(str, len, "exists");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) 	else if (state & CEPH_OSD_UP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) 		snprintf(str, len, "up");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) 		snprintf(str, len, "doesn't exist");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) 	return str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) /* maps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) static int calc_bits_of(unsigned int t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) 	int b = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 	while (t) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) 		t = t >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) 		b++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) 	return b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44)  * the foo_mask is the smallest value 2^n-1 that is >= foo.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) static void calc_pg_masks(struct ceph_pg_pool_info *pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 	pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) 	pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53)  * decode crush map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) static int crush_decode_uniform_bucket(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 				       struct crush_bucket_uniform *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 	dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 	ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 	b->item_weight = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) static int crush_decode_list_bucket(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) 				    struct crush_bucket_list *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) 	int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 	dout("crush_decode_list_bucket %p to %p\n", *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 	if (b->item_weights == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 	b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 	if (b->sum_weights == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 	for (j = 0; j < b->h.size; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 		b->item_weights[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 		b->sum_weights[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) static int crush_decode_tree_bucket(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 				    struct crush_bucket_tree *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	dout("crush_decode_tree_bucket %p to %p\n", *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 	ceph_decode_8_safe(p, end, b->num_nodes, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 	if (b->node_weights == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 	for (j = 0; j < b->num_nodes; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 		b->node_weights[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) static int crush_decode_straw_bucket(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 				     struct crush_bucket_straw *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) 	int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 	dout("crush_decode_straw_bucket %p to %p\n", *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) 	if (b->item_weights == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	if (b->straws == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 	ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	for (j = 0; j < b->h.size; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 		b->item_weights[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 		b->straws[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) static int crush_decode_straw2_bucket(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 				      struct crush_bucket_straw2 *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	dout("crush_decode_straw2_bucket %p to %p\n", *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	if (b->item_weights == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	ceph_decode_need(p, end, b->h.size * sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 	for (j = 0; j < b->h.size; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 		b->item_weights[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) struct crush_name_node {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 	struct rb_node cn_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	int cn_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	char cn_name[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) static struct crush_name_node *alloc_crush_name(size_t name_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 	struct crush_name_node *cn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	cn = kmalloc(sizeof(*cn) + name_len + 1, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 	if (!cn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 	RB_CLEAR_NODE(&cn->cn_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 	return cn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) static void free_crush_name(struct crush_name_node *cn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	WARN_ON(!RB_EMPTY_NODE(&cn->cn_node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	kfree(cn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) DEFINE_RB_FUNCS(crush_name, struct crush_name_node, cn_id, cn_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) static int decode_crush_names(void **p, void *end, struct rb_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 	u32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 	ceph_decode_32_safe(p, end, n, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 	while (n--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 		struct crush_name_node *cn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 		int id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 		u32 name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 		ceph_decode_32_safe(p, end, id, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 		ceph_decode_32_safe(p, end, name_len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 		ceph_decode_need(p, end, name_len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 		cn = alloc_crush_name(name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 		if (!cn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 		cn->cn_id = id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 		memcpy(cn->cn_name, *p, name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 		cn->cn_name[name_len] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 		*p += name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 		if (!__insert_crush_name(root, cn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 			free_crush_name(cn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 			return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) void clear_crush_names(struct rb_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	while (!RB_EMPTY_ROOT(root)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 		struct crush_name_node *cn =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 		    rb_entry(rb_first(root), struct crush_name_node, cn_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 		erase_crush_name(root, cn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 		free_crush_name(cn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) static struct crush_choose_arg_map *alloc_choose_arg_map(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	struct crush_choose_arg_map *arg_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	arg_map = kzalloc(sizeof(*arg_map), GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	if (!arg_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 	RB_CLEAR_NODE(&arg_map->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	return arg_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) static void free_choose_arg_map(struct crush_choose_arg_map *arg_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	if (arg_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 		int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 		WARN_ON(!RB_EMPTY_NODE(&arg_map->node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 		for (i = 0; i < arg_map->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 			struct crush_choose_arg *arg = &arg_map->args[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 			for (j = 0; j < arg->weight_set_size; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 				kfree(arg->weight_set[j].weights);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 			kfree(arg->weight_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 			kfree(arg->ids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 		kfree(arg_map->args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 		kfree(arg_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) DEFINE_RB_FUNCS(choose_arg_map, struct crush_choose_arg_map, choose_args_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 		node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) void clear_choose_args(struct crush_map *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	while (!RB_EMPTY_ROOT(&c->choose_args)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 		struct crush_choose_arg_map *arg_map =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 		    rb_entry(rb_first(&c->choose_args),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 			     struct crush_choose_arg_map, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 		erase_choose_arg_map(&c->choose_args, arg_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 		free_choose_arg_map(arg_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) static u32 *decode_array_32_alloc(void **p, void *end, u32 *plen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 	u32 *a = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 	ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 	if (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 		u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		a = kmalloc_array(len, sizeof(u32), GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 		if (!a) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 			ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 		ceph_decode_need(p, end, len * sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 		for (i = 0; i < len; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 			a[i] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 	*plen = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 	return a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 	kfree(a);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 	return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293)  * Assumes @arg is zero-initialized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) static int decode_choose_arg(void **p, void *end, struct crush_choose_arg *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	ceph_decode_32_safe(p, end, arg->weight_set_size, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	if (arg->weight_set_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 		u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 		arg->weight_set = kmalloc_array(arg->weight_set_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 						sizeof(*arg->weight_set),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 						GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 		if (!arg->weight_set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 		for (i = 0; i < arg->weight_set_size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 			struct crush_weight_set *w = &arg->weight_set[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 			w->weights = decode_array_32_alloc(p, end, &w->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 			if (IS_ERR(w->weights)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 				ret = PTR_ERR(w->weights);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 				w->weights = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 				return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	arg->ids = decode_array_32_alloc(p, end, &arg->ids_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	if (IS_ERR(arg->ids)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 		ret = PTR_ERR(arg->ids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 		arg->ids = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) static int decode_choose_args(void **p, void *end, struct crush_map *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	struct crush_choose_arg_map *arg_map = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	u32 num_choose_arg_maps, num_buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	ceph_decode_32_safe(p, end, num_choose_arg_maps, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	while (num_choose_arg_maps--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 		arg_map = alloc_choose_arg_map();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 		if (!arg_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 			ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 		ceph_decode_64_safe(p, end, arg_map->choose_args_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 				    e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 		arg_map->size = c->max_buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 		arg_map->args = kcalloc(arg_map->size, sizeof(*arg_map->args),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 					GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 		if (!arg_map->args) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 			ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 		ceph_decode_32_safe(p, end, num_buckets, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 		while (num_buckets--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 			struct crush_choose_arg *arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 			u32 bucket_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 			ceph_decode_32_safe(p, end, bucket_index, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 			if (bucket_index >= arg_map->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 				goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 			arg = &arg_map->args[bucket_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 			ret = decode_choose_arg(p, end, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 			if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 				goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 			if (arg->ids_size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 			    arg->ids_size != c->buckets[bucket_index]->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 				goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 		insert_choose_arg_map(&c->choose_args, arg_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	free_choose_arg_map(arg_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) static void crush_finalize(struct crush_map *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	__s32 b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	/* Space for the array of pointers to per-bucket workspace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	c->working_size = sizeof(struct crush_work) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	    c->max_buckets * sizeof(struct crush_work_bucket *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 	for (b = 0; b < c->max_buckets; b++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 		if (!c->buckets[b])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 		switch (c->buckets[b]->alg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 			 * The base case, permutation variables and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 			 * the pointer to the permutation array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 			c->working_size += sizeof(struct crush_work_bucket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 		/* Every bucket has a permutation array. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 		c->working_size += c->buckets[b]->size * sizeof(__u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) static struct crush_map *crush_decode(void *pbyval, void *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	struct crush_map *c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	void **p = &pbyval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	void *start = pbyval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 	u32 magic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	c = kzalloc(sizeof(*c), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	if (c == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	c->type_names = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	c->names = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 	c->choose_args = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434)         /* set tunables to default values */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435)         c->choose_local_tries = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436)         c->choose_local_fallback_tries = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437)         c->choose_total_tries = 19;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	c->chooseleaf_descend_once = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	ceph_decode_need(p, end, 4*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	magic = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 	if (magic != CRUSH_MAGIC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 		pr_err("crush_decode magic %x != current %x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 		       (unsigned int)magic, (unsigned int)CRUSH_MAGIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 	c->max_buckets = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	c->max_rules = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	c->max_devices = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 	if (c->buckets == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 		goto badmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 	c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 	if (c->rules == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 		goto badmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	/* buckets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 	for (i = 0; i < c->max_buckets; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 		int size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 		u32 alg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 		struct crush_bucket *b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 		ceph_decode_32_safe(p, end, alg, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 		if (alg == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 			c->buckets[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 		dout("crush_decode bucket %d off %x %p to %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 		     i, (int)(*p-start), *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 		switch (alg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 		case CRUSH_BUCKET_UNIFORM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 			size = sizeof(struct crush_bucket_uniform);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 		case CRUSH_BUCKET_LIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 			size = sizeof(struct crush_bucket_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 		case CRUSH_BUCKET_TREE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 			size = sizeof(struct crush_bucket_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 		case CRUSH_BUCKET_STRAW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 			size = sizeof(struct crush_bucket_straw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 		case CRUSH_BUCKET_STRAW2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 			size = sizeof(struct crush_bucket_straw2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 		default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 		BUG_ON(size == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 		b = c->buckets[i] = kzalloc(size, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 		if (b == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 			goto badmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 		ceph_decode_need(p, end, 4*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 		b->id = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 		b->type = ceph_decode_16(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 		b->alg = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 		b->hash = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 		b->weight = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 		b->size = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 		dout("crush_decode bucket size %d off %x %p to %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 		     b->size, (int)(*p-start), *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 		b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 		if (b->items == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 			goto badmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 		ceph_decode_need(p, end, b->size*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		for (j = 0; j < b->size; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 			b->items[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 		switch (b->alg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 		case CRUSH_BUCKET_UNIFORM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 			err = crush_decode_uniform_bucket(p, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 				  (struct crush_bucket_uniform *)b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 			if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 				goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 		case CRUSH_BUCKET_LIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 			err = crush_decode_list_bucket(p, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 			       (struct crush_bucket_list *)b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 			if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 				goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 		case CRUSH_BUCKET_TREE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 			err = crush_decode_tree_bucket(p, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 				(struct crush_bucket_tree *)b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 			if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 				goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 		case CRUSH_BUCKET_STRAW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 			err = crush_decode_straw_bucket(p, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 				(struct crush_bucket_straw *)b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 			if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 				goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 		case CRUSH_BUCKET_STRAW2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 			err = crush_decode_straw2_bucket(p, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 				(struct crush_bucket_straw2 *)b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 			if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 				goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 	/* rules */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 	dout("rule vec is %p\n", c->rules);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 	for (i = 0; i < c->max_rules; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 		u32 yes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 		struct crush_rule *r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 		ceph_decode_32_safe(p, end, yes, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 		if (!yes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 			dout("crush_decode NO rule %d off %x %p to %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 			     i, (int)(*p-start), *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 			c->rules[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 		dout("crush_decode rule %d off %x %p to %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 		     i, (int)(*p-start), *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 		/* len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 		ceph_decode_32_safe(p, end, yes, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) #if BITS_PER_LONG == 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 		if (yes > (ULONG_MAX - sizeof(*r))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 			  / sizeof(struct crush_rule_step))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 		r = kmalloc(struct_size(r, steps, yes), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 		c->rules[i] = r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 		if (r == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 			goto badmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 		dout(" rule %d is at %p\n", i, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 		r->len = yes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 		ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 		ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 		for (j = 0; j < r->len; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 			r->steps[j].op = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 			r->steps[j].arg1 = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 			r->steps[j].arg2 = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	err = decode_crush_names(p, end, &c->type_names);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 	err = decode_crush_names(p, end, &c->names);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 		goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	ceph_decode_skip_map(p, end, 32, string, bad); /* rule_name_map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598)         /* tunables */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599)         ceph_decode_need(p, end, 3*sizeof(u32), done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600)         c->choose_local_tries = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601)         c->choose_local_fallback_tries =  ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602)         c->choose_total_tries = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603)         dout("crush decode tunable choose_local_tries = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604)              c->choose_local_tries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605)         dout("crush decode tunable choose_local_fallback_tries = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606)              c->choose_local_fallback_tries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607)         dout("crush decode tunable choose_total_tries = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608)              c->choose_total_tries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	ceph_decode_need(p, end, sizeof(u32), done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	c->chooseleaf_descend_once = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	dout("crush decode tunable chooseleaf_descend_once = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	     c->chooseleaf_descend_once);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	ceph_decode_need(p, end, sizeof(u8), done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	c->chooseleaf_vary_r = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	dout("crush decode tunable chooseleaf_vary_r = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 	     c->chooseleaf_vary_r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	/* skip straw_calc_version, allowed_bucket_algs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	*p += sizeof(u8) + sizeof(u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	ceph_decode_need(p, end, sizeof(u8), done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	c->chooseleaf_stable = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	dout("crush decode tunable chooseleaf_stable = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	     c->chooseleaf_stable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	if (*p != end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 		/* class_map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 		ceph_decode_skip_map(p, end, 32, 32, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 		/* class_name */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 		ceph_decode_skip_map(p, end, 32, string, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 		/* class_bucket */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 		ceph_decode_skip_map_of_map(p, end, 32, 32, 32, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	if (*p != end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 		err = decode_choose_args(p, end, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 			goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	crush_finalize(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	dout("crush_decode success\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	return c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) badmem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	dout("crush_decode fail %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	crush_destroy(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 	err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 	goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 	if (lhs->pool < rhs->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	if (lhs->pool > rhs->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 	if (lhs->seed < rhs->seed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	if (lhs->seed > rhs->seed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	ret = ceph_pg_compare(&lhs->pgid, &rhs->pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 	if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 	if (lhs->shard < rhs->shard)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	if (lhs->shard > rhs->shard)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) static struct ceph_pg_mapping *alloc_pg_mapping(size_t payload_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	pg = kmalloc(sizeof(*pg) + payload_len, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	if (!pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 	RB_CLEAR_NODE(&pg->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	return pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) static void free_pg_mapping(struct ceph_pg_mapping *pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 	WARN_ON(!RB_EMPTY_NODE(&pg->node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 	kfree(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711)  * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712)  * to a set of osds) and primary_temp (explicit primary setting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) DEFINE_RB_FUNCS2(pg_mapping, struct ceph_pg_mapping, pgid, ceph_pg_compare,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 		 RB_BYPTR, const struct ceph_pg *, node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718)  * rbtree of pg pool info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) DEFINE_RB_FUNCS(pg_pool, struct ceph_pg_pool_info, id, node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 	return lookup_pg_pool(&map->pg_pools, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 	struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	if (id == CEPH_NOPOOL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	if (WARN_ON_ONCE(id > (u64) INT_MAX))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 	pi = lookup_pg_pool(&map->pg_pools, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	return pi ? pi->name : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) EXPORT_SYMBOL(ceph_pg_pool_name_by_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 	struct rb_node *rbp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 		struct ceph_pg_pool_info *pi =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 			rb_entry(rbp, struct ceph_pg_pool_info, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 		if (pi->name && strcmp(pi->name, name) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 			return pi->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) EXPORT_SYMBOL(ceph_pg_poolid_by_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	pi = lookup_pg_pool(&map->pg_pools, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	return pi ? pi->flags : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) EXPORT_SYMBOL(ceph_pg_pool_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	erase_pg_pool(root, pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	kfree(pi->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	kfree(pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 	u8 ev, cv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 	unsigned len, num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	void *pool_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 	ceph_decode_need(p, end, 2 + 4, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	ev = ceph_decode_8(p);  /* encoding version */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	cv = ceph_decode_8(p); /* compat version */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	if (ev < 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 		pr_warn("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	if (cv > 9) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 		pr_warn("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 	len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 	ceph_decode_need(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	pool_end = *p + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 	pi->type = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	pi->size = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 	pi->crush_ruleset = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	pi->object_hash = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 	pi->pg_num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 	pi->pgp_num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	*p += 4 + 4;  /* skip lpg* */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 	*p += 4;      /* skip last_change */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	*p += 8 + 4;  /* skip snap_seq, snap_epoch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 	/* skip snaps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 	while (num--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 		*p += 8;  /* snapid key */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 		*p += 1 + 1; /* versions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 		len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 		*p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 	/* skip removed_snaps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 	num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 	*p += num * (8 + 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	*p += 8;  /* skip auid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	pi->flags = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 	*p += 4;  /* skip crash_replay_interval */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 	if (ev >= 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 		pi->min_size = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 		pi->min_size = pi->size - pi->size / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	if (ev >= 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 		*p += 8 + 8;  /* skip quota_max_* */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 	if (ev >= 9) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 		/* skip tiers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 		num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 		*p += num * 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 		*p += 8;  /* skip tier_of */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 		*p += 1;  /* skip cache_mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 		pi->read_tier = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 		pi->write_tier = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 		pi->read_tier = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 		pi->write_tier = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 	if (ev >= 10) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 		/* skip properties */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 		num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 		while (num--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 			len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 			*p += len; /* key */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 			len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 			*p += len; /* val */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	if (ev >= 11) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 		/* skip hit_set_params */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 		*p += 1 + 1; /* versions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 		len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 		*p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 		*p += 4; /* skip hit_set_period */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 		*p += 4; /* skip hit_set_count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	if (ev >= 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 		*p += 4; /* skip stripe_width */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	if (ev >= 13) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 		*p += 8; /* skip target_max_bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 		*p += 8; /* skip target_max_objects */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 		*p += 4; /* skip cache_target_dirty_ratio_micro */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 		*p += 4; /* skip cache_target_full_ratio_micro */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 		*p += 4; /* skip cache_min_flush_age */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 		*p += 4; /* skip cache_min_evict_age */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	if (ev >=  14) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 		/* skip erasure_code_profile */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 		len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 		*p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 	 * last_force_op_resend_preluminous, will be overridden if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 	 * map was encoded with RESEND_ON_SPLIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	if (ev >= 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 		pi->last_force_request_resend = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		pi->last_force_request_resend = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	if (ev >= 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 		*p += 4; /* skip min_read_recency_for_promote */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 	if (ev >= 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		*p += 8; /* skip expected_num_objects */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	if (ev >= 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 		*p += 4; /* skip cache_target_dirty_high_ratio_micro */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	if (ev >= 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 		*p += 4; /* skip min_write_recency_for_promote */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	if (ev >= 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 		*p += 1; /* skip use_gmt_hitset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	if (ev >= 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 		*p += 1; /* skip fast_read */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	if (ev >= 23) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 		*p += 4; /* skip hit_set_grade_decay_rate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 		*p += 4; /* skip hit_set_search_last_n */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 	if (ev >= 24) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 		/* skip opts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 		*p += 1 + 1; /* versions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 		len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 		*p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	if (ev >= 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 		pi->last_force_request_resend = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	/* ignore the rest */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	*p = pool_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	calc_pg_masks(pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	u32 num, len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 	u64 pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	ceph_decode_32_safe(p, end, num, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	dout(" %d pool names\n", num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	while (num--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 		ceph_decode_64_safe(p, end, pool, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 		ceph_decode_32_safe(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 		dout("  pool %llu len %d\n", pool, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 		ceph_decode_need(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 		pi = lookup_pg_pool(&map->pg_pools, pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 		if (pi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 			char *name = kstrndup(*p, len, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 			if (!name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 				return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 			kfree(pi->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 			pi->name = name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 			dout("  name is %s\n", pi->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 		*p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968)  * CRUSH workspaces
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970)  * workspace_manager framework borrowed from fs/btrfs/compression.c.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971)  * Two simplifications: there is only one type of workspace and there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972)  * is always at least one workspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) static struct crush_work *alloc_workspace(const struct crush_map *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	struct crush_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 	size_t work_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	WARN_ON(!c->working_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	work_size = crush_work_size(c, CEPH_PG_MAX_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	dout("%s work_size %zu bytes\n", __func__, work_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	work = ceph_kvmalloc(work_size, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	if (!work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 	INIT_LIST_HEAD(&work->item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 	crush_init_workspace(c, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) static void free_workspace(struct crush_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 	WARN_ON(!list_empty(&work->item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 	kvfree(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) static void init_workspace_manager(struct workspace_manager *wsm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	INIT_LIST_HEAD(&wsm->idle_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	spin_lock_init(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 	atomic_set(&wsm->total_ws, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 	wsm->free_ws = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	init_waitqueue_head(&wsm->ws_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) static void add_initial_workspace(struct workspace_manager *wsm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 				  struct crush_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 	WARN_ON(!list_empty(&wsm->idle_ws));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	list_add(&work->item, &wsm->idle_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	atomic_set(&wsm->total_ws, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	wsm->free_ws = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) static void cleanup_workspace_manager(struct workspace_manager *wsm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 	struct crush_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	while (!list_empty(&wsm->idle_ws)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 		work = list_first_entry(&wsm->idle_ws, struct crush_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 					item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 		list_del_init(&work->item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 		free_workspace(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 	atomic_set(&wsm->total_ws, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 	wsm->free_ws = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)  * Finds an available workspace or allocates a new one.  If it's not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)  * possible to allocate a new one, waits until there is one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) static struct crush_work *get_workspace(struct workspace_manager *wsm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 					const struct crush_map *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	struct crush_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	int cpus = num_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 	spin_lock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 	if (!list_empty(&wsm->idle_ws)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 		work = list_first_entry(&wsm->idle_ws, struct crush_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 					item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 		list_del_init(&work->item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 		wsm->free_ws--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 		spin_unlock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 		return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 	if (atomic_read(&wsm->total_ws) > cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 		DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 		spin_unlock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 		prepare_to_wait(&wsm->ws_wait, &wait, TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		if (atomic_read(&wsm->total_ws) > cpus && !wsm->free_ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 			schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 		finish_wait(&wsm->ws_wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 		goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	atomic_inc(&wsm->total_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	spin_unlock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	work = alloc_workspace(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	if (!work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 		atomic_dec(&wsm->total_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 		wake_up(&wsm->ws_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 		 * Do not return the error but go back to waiting.  We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 		 * have the inital workspace and the CRUSH computation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 		 * time is bounded so we will get it eventually.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 		WARN_ON(atomic_read(&wsm->total_ws) < 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 		goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082)  * Puts a workspace back on the list or frees it if we have enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)  * idle ones sitting around.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) static void put_workspace(struct workspace_manager *wsm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 			  struct crush_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 	spin_lock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 	if (wsm->free_ws <= num_online_cpus()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 		list_add(&work->item, &wsm->idle_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 		wsm->free_ws++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 		spin_unlock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 		goto wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	spin_unlock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	free_workspace(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	atomic_dec(&wsm->total_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) wake:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	if (wq_has_sleeper(&wsm->ws_wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 		wake_up(&wsm->ws_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105)  * osd map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) struct ceph_osdmap *ceph_osdmap_alloc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 	struct ceph_osdmap *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	map = kzalloc(sizeof(*map), GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	if (!map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 	map->pg_pools = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	map->pool_max = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	map->pg_temp = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	map->primary_temp = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	map->pg_upmap = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	map->pg_upmap_items = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	init_workspace_manager(&map->crush_wsm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 	return map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) void ceph_osdmap_destroy(struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	dout("osdmap_destroy %p\n", map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	if (map->crush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 		crush_destroy(map->crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	cleanup_workspace_manager(&map->crush_wsm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	while (!RB_EMPTY_ROOT(&map->pg_temp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 		struct ceph_pg_mapping *pg =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 			rb_entry(rb_first(&map->pg_temp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 				 struct ceph_pg_mapping, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 		erase_pg_mapping(&map->pg_temp, pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 		free_pg_mapping(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	while (!RB_EMPTY_ROOT(&map->primary_temp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 		struct ceph_pg_mapping *pg =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 			rb_entry(rb_first(&map->primary_temp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 				 struct ceph_pg_mapping, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 		erase_pg_mapping(&map->primary_temp, pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 		free_pg_mapping(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	while (!RB_EMPTY_ROOT(&map->pg_upmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 		struct ceph_pg_mapping *pg =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 			rb_entry(rb_first(&map->pg_upmap),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 				 struct ceph_pg_mapping, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 		rb_erase(&pg->node, &map->pg_upmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 		kfree(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	while (!RB_EMPTY_ROOT(&map->pg_upmap_items)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 		struct ceph_pg_mapping *pg =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 			rb_entry(rb_first(&map->pg_upmap_items),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 				 struct ceph_pg_mapping, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 		rb_erase(&pg->node, &map->pg_upmap_items);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 		kfree(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	while (!RB_EMPTY_ROOT(&map->pg_pools)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 		struct ceph_pg_pool_info *pi =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 			rb_entry(rb_first(&map->pg_pools),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 				 struct ceph_pg_pool_info, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 		__remove_pg_pool(&map->pg_pools, pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	kvfree(map->osd_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	kvfree(map->osd_weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	kvfree(map->osd_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	kvfree(map->osd_primary_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	kfree(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177)  * Adjust max_osd value, (re)allocate arrays.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179)  * The new elements are properly initialized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	u32 *state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	u32 *weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	struct ceph_entity_addr *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	u32 to_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	dout("%s old %u new %u\n", __func__, map->max_osd, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	if (max == map->max_osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 	if (!state || !weight || !addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 		kvfree(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 		kvfree(weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 		kvfree(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	to_copy = min(map->max_osd, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	if (map->osd_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 		memcpy(state, map->osd_state, to_copy * sizeof(*state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 		memcpy(weight, map->osd_weight, to_copy * sizeof(*weight));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 		memcpy(addr, map->osd_addr, to_copy * sizeof(*addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 		kvfree(map->osd_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 		kvfree(map->osd_weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 		kvfree(map->osd_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 	map->osd_state = state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	map->osd_weight = weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	map->osd_addr = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	for (i = map->max_osd; i < max; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 		map->osd_state[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 		map->osd_weight[i] = CEPH_OSD_OUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 		memset(map->osd_addr + i, 0, sizeof(*map->osd_addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	if (map->osd_primary_affinity) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 		u32 *affinity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 		affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 					 GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 		if (!affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 		memcpy(affinity, map->osd_primary_affinity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 		       to_copy * sizeof(*affinity));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 		kvfree(map->osd_primary_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 		map->osd_primary_affinity = affinity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 		for (i = map->max_osd; i < max; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 			map->osd_primary_affinity[i] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 			    CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	map->max_osd = max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	struct crush_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 	if (IS_ERR(crush))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 		return PTR_ERR(crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	work = alloc_workspace(crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 	if (!work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 		crush_destroy(crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 	if (map->crush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 		crush_destroy(map->crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 	cleanup_workspace_manager(&map->crush_wsm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	map->crush = crush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	add_initial_workspace(&map->crush_wsm, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) #define OSDMAP_WRAPPER_COMPAT_VER	7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) #define OSDMAP_CLIENT_DATA_COMPAT_VER	1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270)  * Return 0 or error.  On success, *v is set to 0 for old (v6) osdmaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271)  * to struct_v of the client_data section for new (v7 and above)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272)  * osdmaps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) static int get_osdmap_client_data_v(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 				    const char *prefix, u8 *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	u8 struct_v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	ceph_decode_8_safe(p, end, struct_v, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	if (struct_v >= 7) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 		u8 struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 		ceph_decode_8_safe(p, end, struct_compat, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 		if (struct_compat > OSDMAP_WRAPPER_COMPAT_VER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 			pr_warn("got v %d cv %d > %d of %s ceph_osdmap\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 				struct_v, struct_compat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 				OSDMAP_WRAPPER_COMPAT_VER, prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 		*p += 4; /* ignore wrapper struct_len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 		ceph_decode_8_safe(p, end, struct_v, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 		ceph_decode_8_safe(p, end, struct_compat, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 		if (struct_compat > OSDMAP_CLIENT_DATA_COMPAT_VER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 			pr_warn("got v %d cv %d > %d of %s ceph_osdmap client data\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 				struct_v, struct_compat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 				OSDMAP_CLIENT_DATA_COMPAT_VER, prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 		*p += 4; /* ignore client data struct_len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 		u16 version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 		*p -= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 		ceph_decode_16_safe(p, end, version, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 		if (version < 6) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 			pr_warn("got v %d < 6 of %s ceph_osdmap\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 				version, prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 		/* old osdmap enconding */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 		struct_v = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 	*v = struct_v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) static int __decode_pools(void **p, void *end, struct ceph_osdmap *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 			  bool incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	u32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 	ceph_decode_32_safe(p, end, n, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	while (n--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 		struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 		u64 pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 		ceph_decode_64_safe(p, end, pool, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 		pi = lookup_pg_pool(&map->pg_pools, pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 		if (!incremental || !pi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 			pi = kzalloc(sizeof(*pi), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 			if (!pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 				return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 			RB_CLEAR_NODE(&pi->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 			pi->id = pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 			if (!__insert_pg_pool(&map->pg_pools, pi)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 				kfree(pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 				return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 		ret = decode_pool(p, end, pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) static int decode_pools(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	return __decode_pools(p, end, map, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) static int decode_new_pools(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	return __decode_pools(p, end, map, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) typedef struct ceph_pg_mapping *(*decode_mapping_fn_t)(void **, void *, bool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) static int decode_pg_mapping(void **p, void *end, struct rb_root *mapping_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 			     decode_mapping_fn_t fn, bool incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 	u32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 	WARN_ON(!incremental && !fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 	ceph_decode_32_safe(p, end, n, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 	while (n--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 		struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 		struct ceph_pg pgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 		ret = ceph_decode_pgid(p, end, &pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 		pg = lookup_pg_mapping(mapping_root, &pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 		if (pg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 			WARN_ON(!incremental);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 			erase_pg_mapping(mapping_root, pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 			free_pg_mapping(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 		if (fn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 			pg = fn(p, end, incremental);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 			if (IS_ERR(pg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 				return PTR_ERR(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 			if (pg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 				pg->pgid = pgid; /* struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 				insert_pg_mapping(mapping_root, pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) static struct ceph_pg_mapping *__decode_pg_temp(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 						bool incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 	struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 	u32 len, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 	ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 	if (len == 0 && incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 		return NULL;	/* new_pg_temp: [] to remove */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 	if (len > (SIZE_MAX - sizeof(*pg)) / sizeof(u32))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 	ceph_decode_need(p, end, len * sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 	pg = alloc_pg_mapping(len * sizeof(u32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 	if (!pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 	pg->pg_temp.len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 	for (i = 0; i < len; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 		pg->pg_temp.osds[i] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 	return pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 	return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) static int decode_pg_temp(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	return decode_pg_mapping(p, end, &map->pg_temp, __decode_pg_temp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 				 false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) static int decode_new_pg_temp(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 	return decode_pg_mapping(p, end, &map->pg_temp, __decode_pg_temp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 				 true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) static struct ceph_pg_mapping *__decode_primary_temp(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 						     bool incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 	struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 	u32 osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 	ceph_decode_32_safe(p, end, osd, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	if (osd == (u32)-1 && incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 		return NULL;	/* new_primary_temp: -1 to remove */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 	pg = alloc_pg_mapping(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 	if (!pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 	pg->primary_temp.osd = osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 	return pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 	return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) static int decode_primary_temp(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	return decode_pg_mapping(p, end, &map->primary_temp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 				 __decode_primary_temp, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) static int decode_new_primary_temp(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 				   struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	return decode_pg_mapping(p, end, &map->primary_temp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 				 __decode_primary_temp, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 	BUG_ON(osd >= map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 	if (!map->osd_primary_affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 		return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 	return map->osd_primary_affinity[osd];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 	BUG_ON(osd >= map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 	if (!map->osd_primary_affinity) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 		int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 		map->osd_primary_affinity = ceph_kvmalloc(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 		    array_size(map->max_osd, sizeof(*map->osd_primary_affinity)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 		    GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 		if (!map->osd_primary_affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 		for (i = 0; i < map->max_osd; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 			map->osd_primary_affinity[i] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 			    CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	map->osd_primary_affinity[osd] = aff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) static int decode_primary_affinity(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 				   struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 	u32 len, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 	ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 	if (len == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 		kvfree(map->osd_primary_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 		map->osd_primary_affinity = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 	if (len != map->max_osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 		goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 	ceph_decode_need(p, end, map->max_osd*sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	for (i = 0; i < map->max_osd; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 		ret = set_primary_affinity(map, i, ceph_decode_32(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) static int decode_new_primary_affinity(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 				       struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 	u32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 	ceph_decode_32_safe(p, end, n, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 	while (n--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 		u32 osd, aff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 		ceph_decode_32_safe(p, end, osd, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 		ceph_decode_32_safe(p, end, aff, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 		ret = set_primary_affinity(map, osd, aff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 		pr_info("osd%d primary-affinity 0x%x\n", osd, aff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) static struct ceph_pg_mapping *__decode_pg_upmap(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) 						 bool __unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) 	return __decode_pg_temp(p, end, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) static int decode_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) 	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 				 false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) static int decode_new_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 	return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 				 true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) static int decode_old_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 	return decode_pg_mapping(p, end, &map->pg_upmap, NULL, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 						       bool __unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 	struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 	u32 len, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 	ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 	if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 	ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	pg = alloc_pg_mapping(2 * len * sizeof(u32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	if (!pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 	pg->pg_upmap_items.len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 	for (i = 0; i < len; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 		pg->pg_upmap_items.from_to[i][0] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 		pg->pg_upmap_items.from_to[i][1] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 	return pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 	return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) static int decode_pg_upmap_items(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 	return decode_pg_mapping(p, end, &map->pg_upmap_items,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 				 __decode_pg_upmap_items, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) static int decode_new_pg_upmap_items(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 				     struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 	return decode_pg_mapping(p, end, &map->pg_upmap_items,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 				 __decode_pg_upmap_items, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) static int decode_old_pg_upmap_items(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 				     struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 	return decode_pg_mapping(p, end, &map->pg_upmap_items, NULL, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648)  * decode a full map.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 	u8 struct_v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 	u32 epoch = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 	void *start = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 	u32 max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 	u32 len, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 	dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 	err = get_osdmap_client_data_v(p, end, "full", &struct_v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 	/* fsid, epoch, created, modified */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 	ceph_decode_need(p, end, sizeof(map->fsid) + sizeof(u32) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 			 sizeof(map->created) + sizeof(map->modified), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 	ceph_decode_copy(p, &map->fsid, sizeof(map->fsid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 	epoch = map->epoch = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 	ceph_decode_copy(p, &map->created, sizeof(map->created));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 	ceph_decode_copy(p, &map->modified, sizeof(map->modified));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 	/* pools */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 	err = decode_pools(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 	/* pool_name */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 	err = decode_pool_names(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 	ceph_decode_32_safe(p, end, map->pool_max, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 	ceph_decode_32_safe(p, end, map->flags, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 	/* max_osd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 	ceph_decode_32_safe(p, end, max, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 	/* (re)alloc osd arrays */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 	err = osdmap_set_max_osd(map, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 	/* osd_state, osd_weight, osd_addrs->client_addr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 	ceph_decode_need(p, end, 3*sizeof(u32) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 			 map->max_osd*(struct_v >= 5 ? sizeof(u32) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 						       sizeof(u8)) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 				       sizeof(*map->osd_weight), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 	if (ceph_decode_32(p) != map->max_osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 		goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 	if (struct_v >= 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 		for (i = 0; i < map->max_osd; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 			map->osd_state[i] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 		for (i = 0; i < map->max_osd; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 			map->osd_state[i] = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 	if (ceph_decode_32(p) != map->max_osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 		goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 	for (i = 0; i < map->max_osd; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 		map->osd_weight[i] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 	if (ceph_decode_32(p) != map->max_osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 		goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 	for (i = 0; i < map->max_osd; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 		err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 	/* pg_temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 	err = decode_pg_temp(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 	/* primary_temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 	if (struct_v >= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 		err = decode_primary_temp(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 	/* primary_affinity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 	if (struct_v >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 		err = decode_primary_affinity(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 		WARN_ON(map->osd_primary_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 	/* crush */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 	ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 	err = osdmap_set_crush(map, crush_decode(*p, min(*p + len, end)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 	*p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	if (struct_v >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 		/* erasure_code_profiles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 		ceph_decode_skip_map_of_map(p, end, string, string, string,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 					    e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 	if (struct_v >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 		err = decode_pg_upmap(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 		err = decode_pg_upmap_items(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 		WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap_items));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 	/* ignore the rest */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 	*p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 	dout("full osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 	err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 	pr_err("corrupt full osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 	       err, epoch, (int)(*p - start), *p, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 	print_hex_dump(KERN_DEBUG, "osdmap: ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 		       DUMP_PREFIX_OFFSET, 16, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 		       start, end - start, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 	return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791)  * Allocate and decode a full map.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) 	struct ceph_osdmap *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 	map = ceph_osdmap_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 	if (!map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 		return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 	ret = osdmap_decode(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 	if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 		ceph_osdmap_destroy(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 		return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 	return map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)  * Encoding order is (new_up_client, new_state, new_weight).  Need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813)  * apply in the (new_weight, new_state, new_up_client) order, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814)  * an incremental map may look like e.g.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816)  *     new_up_client: { osd=6, addr=... } # set osd_state and addr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817)  *     new_state: { osd=6, xorstate=EXISTS } # clear osd_state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 				      struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 	void *new_up_client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 	void *new_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 	void *new_weight_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 	u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 	new_up_client = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 	ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 	for (i = 0; i < len; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 		struct ceph_entity_addr addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 		ceph_decode_skip_32(p, end, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 		if (ceph_decode_entity_addr(p, end, &addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 			goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 	new_state = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 	ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 	len *= sizeof(u32) + (struct_v >= 5 ? sizeof(u32) : sizeof(u8));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 	ceph_decode_need(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 	*p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 	/* new_weight */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 	ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 	while (len--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 		s32 osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 		u32 w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 		ceph_decode_need(p, end, 2*sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 		osd = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 		w = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 		BUG_ON(osd >= map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 		pr_info("osd%d weight 0x%x %s\n", osd, w,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 		     w == CEPH_OSD_IN ? "(in)" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 		     (w == CEPH_OSD_OUT ? "(out)" : ""));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 		map->osd_weight[osd] = w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 		 * If we are marking in, set the EXISTS, and clear the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 		 * AUTOOUT and NEW bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 		if (w) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 			map->osd_state[osd] |= CEPH_OSD_EXISTS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 			map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 						 CEPH_OSD_NEW);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 	new_weight_end = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 	/* new_state (up/down) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 	*p = new_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 	len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 	while (len--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 		s32 osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 		u32 xorstate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 		osd = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 		if (struct_v >= 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 			xorstate = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 			xorstate = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 		if (xorstate == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 			xorstate = CEPH_OSD_UP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 		BUG_ON(osd >= map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 		if ((map->osd_state[osd] & CEPH_OSD_UP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 		    (xorstate & CEPH_OSD_UP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 			pr_info("osd%d down\n", osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 		if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 		    (xorstate & CEPH_OSD_EXISTS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 			pr_info("osd%d does not exist\n", osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 			ret = set_primary_affinity(map, osd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 						   CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 			if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 				return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 			memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 			map->osd_state[osd] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 			map->osd_state[osd] ^= xorstate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 	/* new_up_client */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 	*p = new_up_client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 	len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 	while (len--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 		s32 osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 		struct ceph_entity_addr addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 		osd = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 		BUG_ON(osd >= map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 		if (ceph_decode_entity_addr(p, end, &addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 			goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 		pr_info("osd%d up\n", osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 		map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 		map->osd_addr[osd] = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 	*p = new_weight_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 	return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928)  * decode and apply an incremental map update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 					     struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 	struct ceph_fsid fsid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 	u32 epoch = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 	struct ceph_timespec modified;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 	s32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 	u64 pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 	__s64 new_pool_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 	__s32 new_flags, max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 	void *start = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 	int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 	u8 struct_v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 	dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 	err = get_osdmap_client_data_v(p, end, "inc", &struct_v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 	/* fsid, epoch, modified, new_pool_max, new_flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	ceph_decode_need(p, end, sizeof(fsid) + sizeof(u32) + sizeof(modified) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 			 sizeof(u64) + sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 	ceph_decode_copy(p, &fsid, sizeof(fsid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 	epoch = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 	BUG_ON(epoch != map->epoch+1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 	ceph_decode_copy(p, &modified, sizeof(modified));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	new_pool_max = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 	new_flags = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 	/* full map? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 	ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 	if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 		dout("apply_incremental full map len %d, %p to %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 		     len, *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 		return ceph_osdmap_decode(p, min(*p+len, end));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 	/* new crush? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) 	ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 	if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) 		err = osdmap_set_crush(map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) 				       crush_decode(*p, min(*p + len, end)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 		*p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 	/* new flags? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 	if (new_flags >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 		map->flags = new_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 	if (new_pool_max >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 		map->pool_max = new_pool_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 	/* new max? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 	ceph_decode_32_safe(p, end, max, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 	if (max >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 		err = osdmap_set_max_osd(map, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 	map->epoch++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 	map->modified = modified;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 	/* new_pools */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 	err = decode_new_pools(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 	/* new_pool_names */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 	err = decode_pool_names(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 	/* old_pool */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 	ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 	while (len--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 		struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 		ceph_decode_64_safe(p, end, pool, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 		pi = lookup_pg_pool(&map->pg_pools, pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 		if (pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 			__remove_pg_pool(&map->pg_pools, pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 	/* new_up_client, new_state, new_weight */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 	err = decode_new_up_state_weight(p, end, struct_v, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 	/* new_pg_temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 	err = decode_new_pg_temp(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 	if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 	/* new_primary_temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 	if (struct_v >= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 		err = decode_new_primary_temp(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 	/* new_primary_affinity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 	if (struct_v >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 		err = decode_new_primary_affinity(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 	if (struct_v >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) 		/* new_erasure_code_profiles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 		ceph_decode_skip_map_of_map(p, end, string, string, string,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 					    e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 		/* old_erasure_code_profiles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 		ceph_decode_skip_set(p, end, string, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 	if (struct_v >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 		err = decode_new_pg_upmap(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 		err = decode_old_pg_upmap(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 		err = decode_new_pg_upmap_items(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 		err = decode_old_pg_upmap_items(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 			goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 	/* ignore the rest */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) 	*p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) 	dout("inc osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 	return map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 	err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 	pr_err("corrupt inc osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	       err, epoch, (int)(*p - start), *p, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 	print_hex_dump(KERN_DEBUG, "osdmap: ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 		       DUMP_PREFIX_OFFSET, 16, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 		       start, end - start, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 	return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) void ceph_oloc_copy(struct ceph_object_locator *dest,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 		    const struct ceph_object_locator *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 	ceph_oloc_destroy(dest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 	dest->pool = src->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 	if (src->pool_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 		dest->pool_ns = ceph_get_string(src->pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 		dest->pool_ns = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) EXPORT_SYMBOL(ceph_oloc_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) void ceph_oloc_destroy(struct ceph_object_locator *oloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 	ceph_put_string(oloc->pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) EXPORT_SYMBOL(ceph_oloc_destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) void ceph_oid_copy(struct ceph_object_id *dest,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 		   const struct ceph_object_id *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 	ceph_oid_destroy(dest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 	if (src->name != src->inline_name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 		/* very rare, see ceph_object_id definition */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 		dest->name = kmalloc(src->name_len + 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) 				     GFP_NOIO | __GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 		dest->name = dest->inline_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 	memcpy(dest->name, src->name, src->name_len + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 	dest->name_len = src->name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) EXPORT_SYMBOL(ceph_oid_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) static __printf(2, 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) int oid_printf_vargs(struct ceph_object_id *oid, const char *fmt, va_list ap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 	int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 	WARN_ON(!ceph_oid_empty(oid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 	len = vsnprintf(oid->inline_name, sizeof(oid->inline_name), fmt, ap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 	if (len >= sizeof(oid->inline_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 		return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 	oid->name_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135)  * If oid doesn't fit into inline buffer, BUG.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) void ceph_oid_printf(struct ceph_object_id *oid, const char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 	va_list ap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 	va_start(ap, fmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 	BUG_ON(oid_printf_vargs(oid, fmt, ap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 	va_end(ap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) EXPORT_SYMBOL(ceph_oid_printf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) static __printf(3, 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) int oid_aprintf_vargs(struct ceph_object_id *oid, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 		      const char *fmt, va_list ap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 	va_list aq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 	int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 	va_copy(aq, ap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 	len = oid_printf_vargs(oid, fmt, aq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 	va_end(aq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) 	if (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 		char *external_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 		external_name = kmalloc(len + 1, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 		if (!external_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) 		oid->name = external_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 		WARN_ON(vsnprintf(oid->name, len + 1, fmt, ap) != len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 		oid->name_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174)  * If oid doesn't fit into inline buffer, allocate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 		     const char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 	va_list ap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 	va_start(ap, fmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) 	ret = oid_aprintf_vargs(oid, gfp, fmt, ap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) 	va_end(ap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) EXPORT_SYMBOL(ceph_oid_aprintf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) void ceph_oid_destroy(struct ceph_object_id *oid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 	if (oid->name != oid->inline_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 		kfree(oid->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) EXPORT_SYMBOL(ceph_oid_destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198)  * osds only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) static bool __osds_equal(const struct ceph_osds *lhs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 			 const struct ceph_osds *rhs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) 	if (lhs->size == rhs->size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 	    !memcmp(lhs->osds, rhs->osds, rhs->size * sizeof(rhs->osds[0])))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211)  * osds + primary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) static bool osds_equal(const struct ceph_osds *lhs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 		       const struct ceph_osds *rhs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) 	if (__osds_equal(lhs, rhs) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 	    lhs->primary == rhs->primary)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) static bool osds_valid(const struct ceph_osds *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) 	/* non-empty set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 	if (set->size > 0 && set->primary >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 	/* empty can_shift_osds set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) 	if (!set->size && set->primary == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 	/* empty !can_shift_osds set - all NONE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 	if (set->size > 0 && set->primary == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 		int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 		for (i = 0; i < set->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 			if (set->osds[i] != CRUSH_ITEM_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 		if (i == set->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 	memcpy(dest->osds, src->osds, src->size * sizeof(src->osds[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 	dest->size = src->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 	dest->primary = src->primary;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 		      u32 new_pg_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 	int old_bits = calc_bits_of(old_pg_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 	int old_mask = (1 << old_bits) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 	int n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 	WARN_ON(pgid->seed >= old_pg_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 	if (new_pg_num <= old_pg_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 	for (n = 1; ; n++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 		int next_bit = n << (old_bits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) 		u32 s = next_bit | pgid->seed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 		if (s < old_pg_num || s == pgid->seed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 		if (s >= new_pg_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 		s = ceph_stable_mod(s, old_pg_num, old_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 		if (s == pgid->seed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) bool ceph_is_new_interval(const struct ceph_osds *old_acting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 			  const struct ceph_osds *new_acting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 			  const struct ceph_osds *old_up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 			  const struct ceph_osds *new_up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 			  int old_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 			  int new_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 			  int old_min_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 			  int new_min_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 			  u32 old_pg_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 			  u32 new_pg_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 			  bool old_sort_bitwise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 			  bool new_sort_bitwise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 			  bool old_recovery_deletes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 			  bool new_recovery_deletes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) 			  const struct ceph_pg *pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 	return !osds_equal(old_acting, new_acting) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) 	       !osds_equal(old_up, new_up) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 	       old_size != new_size ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 	       old_min_size != new_min_size ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 	       ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 	       old_sort_bitwise != new_sort_bitwise ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	       old_recovery_deletes != new_recovery_deletes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) static int calc_pg_rank(int osd, const struct ceph_osds *acting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 	for (i = 0; i < acting->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 		if (acting->osds[i] == osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 			return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) static bool primary_changed(const struct ceph_osds *old_acting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 			    const struct ceph_osds *new_acting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 	if (!old_acting->size && !new_acting->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 		return false; /* both still empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 	if (!old_acting->size ^ !new_acting->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 		return true; /* was empty, now not, or vice versa */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 	if (old_acting->primary != new_acting->primary)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 		return true; /* primary changed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 	if (calc_pg_rank(old_acting->primary, old_acting) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 	    calc_pg_rank(new_acting->primary, new_acting))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 	return false; /* same primary (tho replicas may have changed) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) bool ceph_osds_changed(const struct ceph_osds *old_acting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 		       const struct ceph_osds *new_acting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 		       bool any_change)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) 	if (primary_changed(old_acting, new_acting))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) 	if (any_change && !__osds_equal(old_acting, new_acting))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353)  * Map an object into a PG.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355)  * Should only be called with target_oid and target_oloc (as opposed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356)  * base_oid and base_oloc), since tiering isn't taken into account.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) void __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 				 const struct ceph_object_id *oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 				 const struct ceph_object_locator *oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 				 struct ceph_pg *raw_pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 	WARN_ON(pi->id != oloc->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 	if (!oloc->pool_ns) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 		raw_pgid->pool = oloc->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 		raw_pgid->seed = ceph_str_hash(pi->object_hash, oid->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 					     oid->name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 		dout("%s %s -> raw_pgid %llu.%x\n", __func__, oid->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 		     raw_pgid->pool, raw_pgid->seed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 		char stack_buf[256];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) 		char *buf = stack_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 		int nsl = oloc->pool_ns->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 		size_t total = nsl + 1 + oid->name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 		if (total > sizeof(stack_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 			buf = kmalloc(total, GFP_NOIO | __GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 		memcpy(buf, oloc->pool_ns->str, nsl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 		buf[nsl] = '\037';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 		memcpy(buf + nsl + 1, oid->name, oid->name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 		raw_pgid->pool = oloc->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 		raw_pgid->seed = ceph_str_hash(pi->object_hash, buf, total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 		if (buf != stack_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 			kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 		dout("%s %s ns %.*s -> raw_pgid %llu.%x\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 		     oid->name, nsl, oloc->pool_ns->str,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 		     raw_pgid->pool, raw_pgid->seed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 			      const struct ceph_object_id *oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 			      const struct ceph_object_locator *oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 			      struct ceph_pg *raw_pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) 	struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 	pi = ceph_pg_pool_by_id(osdmap, oloc->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 	if (!pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 		return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 	__ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) EXPORT_SYMBOL(ceph_object_locator_to_pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409)  * Map a raw PG (full precision ps) into an actual PG.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) static void raw_pg_to_pg(struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 			 const struct ceph_pg *raw_pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 			 struct ceph_pg *pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 	pgid->pool = raw_pgid->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 	pgid->seed = ceph_stable_mod(raw_pgid->seed, pi->pg_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 				     pi->pg_num_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421)  * Map a raw PG (full precision ps) into a placement ps (placement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422)  * seed).  Include pool id in that value so that different pools don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423)  * use the same seeds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 			 const struct ceph_pg *raw_pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 	if (pi->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 		/* hash pool id and seed so that pool PGs do not overlap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 		return crush_hash32_2(CRUSH_HASH_RJENKINS1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 				      ceph_stable_mod(raw_pgid->seed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 						      pi->pgp_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 						      pi->pgp_num_mask),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 				      raw_pgid->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 		 * legacy behavior: add ps and pool together.  this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 		 * not a great approach because the PGs from each pool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 		 * will overlap on top of each other: 0.5 == 1.4 ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 		 * 2.3 == ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 		return ceph_stable_mod(raw_pgid->seed, pi->pgp_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 				       pi->pgp_num_mask) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 		       (unsigned)raw_pgid->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449)  * Magic value used for a "default" fallback choose_args, used if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450)  * crush_choose_arg_map passed to do_crush() does not exist.  If this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451)  * also doesn't exist, fall back to canonical weights.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) #define CEPH_DEFAULT_CHOOSE_ARGS	-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 		    int *result, int result_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 		    const __u32 *weight, int weight_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 		    s64 choose_args_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 	struct crush_choose_arg_map *arg_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 	struct crush_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 	int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) 	BUG_ON(result_max > CEPH_PG_MAX_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 	arg_map = lookup_choose_arg_map(&map->crush->choose_args,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) 					choose_args_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) 	if (!arg_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) 		arg_map = lookup_choose_arg_map(&map->crush->choose_args,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) 						CEPH_DEFAULT_CHOOSE_ARGS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) 	work = get_workspace(&map->crush_wsm, map->crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) 	r = crush_do_rule(map->crush, ruleno, x, result, result_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) 			  weight, weight_max, work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 			  arg_map ? arg_map->args : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 	put_workspace(&map->crush_wsm, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) 	return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) static void remove_nonexistent_osds(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 				    struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) 				    struct ceph_osds *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) 	if (ceph_can_shift_osds(pi)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) 		int removed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) 		/* shift left */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) 		for (i = 0; i < set->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 			if (!ceph_osd_exists(osdmap, set->osds[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) 				removed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 			if (removed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 				set->osds[i - removed] = set->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) 		set->size -= removed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) 		/* set dne devices to NONE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) 		for (i = 0; i < set->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) 			if (!ceph_osd_exists(osdmap, set->osds[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) 				set->osds[i] = CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509)  * Calculate raw set (CRUSH output) for given PG and filter out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510)  * nonexistent OSDs.  ->primary is undefined for a raw set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512)  * Placement seed (CRUSH input) is returned through @ppps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) 			   struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) 			   const struct ceph_pg *raw_pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) 			   struct ceph_osds *raw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 			   u32 *ppps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 	u32 pps = raw_pg_to_pps(pi, raw_pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 	int ruleno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 	int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 	ceph_osds_init(raw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) 	if (ppps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 		*ppps = pps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 	ruleno = crush_find_rule(osdmap->crush, pi->crush_ruleset, pi->type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 				 pi->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) 	if (ruleno < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) 		pr_err("no crush rule: pool %lld ruleset %d type %d size %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 		       pi->id, pi->crush_ruleset, pi->type, pi->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) 	if (pi->size > ARRAY_SIZE(raw->osds)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 		pr_err_ratelimited("pool %lld ruleset %d type %d too wide: size %d > %zu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) 		       pi->id, pi->crush_ruleset, pi->type, pi->size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 		       ARRAY_SIZE(raw->osds));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 	len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 		       osdmap->osd_weight, osdmap->max_osd, pi->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) 	if (len < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) 		pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) 		       len, ruleno, pi->id, pi->crush_ruleset, pi->type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) 		       pi->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) 	raw->size = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) 	remove_nonexistent_osds(osdmap, pi, raw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) /* apply pg_upmap[_items] mappings */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) static void apply_upmap(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) 			const struct ceph_pg *pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) 			struct ceph_osds *raw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) 	struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) 	pg = lookup_pg_mapping(&osdmap->pg_upmap, pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) 	if (pg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) 		/* make sure targets aren't marked out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) 		for (i = 0; i < pg->pg_upmap.len; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) 			int osd = pg->pg_upmap.osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) 			if (osd != CRUSH_ITEM_NONE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) 			    osd < osdmap->max_osd &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 			    osdmap->osd_weight[osd] == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) 				/* reject/ignore explicit mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) 		for (i = 0; i < pg->pg_upmap.len; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) 			raw->osds[i] = pg->pg_upmap.osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) 		raw->size = pg->pg_upmap.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) 		/* check and apply pg_upmap_items, if any */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) 	pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) 	if (pg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) 		 * Note: this approach does not allow a bidirectional swap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) 		 * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) 		for (i = 0; i < pg->pg_upmap_items.len; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) 			int from = pg->pg_upmap_items.from_to[i][0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) 			int to = pg->pg_upmap_items.from_to[i][1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) 			int pos = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) 			bool exists = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) 			/* make sure replacement doesn't already appear */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) 			for (j = 0; j < raw->size; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) 				int osd = raw->osds[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) 				if (osd == to) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) 					exists = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) 					break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) 				/* ignore mapping if target is marked out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 				if (osd == from && pos < 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 				    !(to != CRUSH_ITEM_NONE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) 				      to < osdmap->max_osd &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 				      osdmap->osd_weight[to] == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) 					pos = j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) 			if (!exists && pos >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) 				raw->osds[pos] = to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618)  * Given raw set, calculate up set and up primary.  By definition of an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619)  * up set, the result won't contain nonexistent or down OSDs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621)  * This is done in-place - on return @set is the up set.  If it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622)  * empty, ->primary will remain undefined.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) static void raw_to_up_osds(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) 			   struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) 			   struct ceph_osds *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) 	/* ->primary is undefined for a raw set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) 	BUG_ON(set->primary != -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) 	if (ceph_can_shift_osds(pi)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) 		int removed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) 		/* shift left */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) 		for (i = 0; i < set->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) 			if (ceph_osd_is_down(osdmap, set->osds[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) 				removed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) 			if (removed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 				set->osds[i - removed] = set->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 		set->size -= removed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) 		if (set->size > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 			set->primary = set->osds[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) 		/* set down/dne devices to NONE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) 		for (i = set->size - 1; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) 			if (ceph_osd_is_down(osdmap, set->osds[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) 				set->osds[i] = CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) 				set->primary = set->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) static void apply_primary_affinity(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) 				   struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) 				   u32 pps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) 				   struct ceph_osds *up)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) 	int pos = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) 	 * Do we have any non-default primary_affinity values for these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) 	 * osds?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) 	if (!osdmap->osd_primary_affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) 	for (i = 0; i < up->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 		int osd = up->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 		if (osd != CRUSH_ITEM_NONE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) 		    osdmap->osd_primary_affinity[osd] !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) 					CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) 	if (i == up->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) 	 * Pick the primary.  Feed both the seed (for the pg) and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) 	 * osd into the hash/rng so that a proportional fraction of an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) 	 * osd's pgs get rejected as primary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) 	for (i = 0; i < up->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) 		int osd = up->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) 		u32 aff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) 		if (osd == CRUSH_ITEM_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) 		aff = osdmap->osd_primary_affinity[osd];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) 		if (aff < CEPH_OSD_MAX_PRIMARY_AFFINITY &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) 		    (crush_hash32_2(CRUSH_HASH_RJENKINS1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) 				    pps, osd) >> 16) >= aff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) 			 * We chose not to use this primary.  Note it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) 			 * anyway as a fallback in case we don't pick
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) 			 * anyone else, but keep looking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) 			if (pos < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) 				pos = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) 			pos = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) 	if (pos < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) 	up->primary = up->osds[pos];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) 	if (ceph_can_shift_osds(pi) && pos > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) 		/* move the new primary to the front */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) 		for (i = pos; i > 0; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) 			up->osds[i] = up->osds[i - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) 		up->osds[0] = up->primary;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728)  * Get pg_temp and primary_temp mappings for given PG.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730)  * Note that a PG may have none, only pg_temp, only primary_temp or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731)  * both pg_temp and primary_temp mappings.  This means @temp isn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732)  * always a valid OSD set on return: in the "only primary_temp" case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733)  * @temp will have its ->primary >= 0 but ->size == 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) static void get_temp_osds(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) 			  struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) 			  const struct ceph_pg *pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) 			  struct ceph_osds *temp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) 	struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) 	ceph_osds_init(temp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) 	/* pg_temp? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) 	pg = lookup_pg_mapping(&osdmap->pg_temp, pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) 	if (pg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) 		for (i = 0; i < pg->pg_temp.len; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) 			if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) 				if (ceph_can_shift_osds(pi))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) 				temp->osds[temp->size++] = CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) 				temp->osds[temp->size++] = pg->pg_temp.osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) 		/* apply pg_temp's primary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) 		for (i = 0; i < temp->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) 			if (temp->osds[i] != CRUSH_ITEM_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) 				temp->primary = temp->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) 	/* primary_temp? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) 	pg = lookup_pg_mapping(&osdmap->primary_temp, pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) 	if (pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) 		temp->primary = pg->primary_temp.osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775)  * Map a PG to its acting set as well as its up set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777)  * Acting set is used for data mapping purposes, while up set can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778)  * recorded for detecting interval changes and deciding whether to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779)  * resend a request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) 			       struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) 			       const struct ceph_pg *raw_pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) 			       struct ceph_osds *up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) 			       struct ceph_osds *acting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) 	struct ceph_pg pgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) 	u32 pps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) 	WARN_ON(pi->id != raw_pgid->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) 	raw_pg_to_pg(pi, raw_pgid, &pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) 	pg_to_raw_osds(osdmap, pi, raw_pgid, up, &pps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) 	apply_upmap(osdmap, &pgid, up);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 	raw_to_up_osds(osdmap, pi, up);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 	apply_primary_affinity(osdmap, pi, pps, up);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) 	get_temp_osds(osdmap, pi, &pgid, acting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 	if (!acting->size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 		memcpy(acting->osds, up->osds, up->size * sizeof(up->osds[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 		acting->size = up->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 		if (acting->primary == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 			acting->primary = up->primary;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) 	WARN_ON(!osds_valid(up) || !osds_valid(acting));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) 			      struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) 			      const struct ceph_pg *raw_pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) 			      struct ceph_spg *spgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) 	struct ceph_pg pgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) 	struct ceph_osds up, acting;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) 	WARN_ON(pi->id != raw_pgid->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) 	raw_pg_to_pg(pi, raw_pgid, &pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) 	if (ceph_can_shift_osds(pi)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) 		spgid->pgid = pgid; /* struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) 		spgid->shard = CEPH_SPG_NOSHARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) 	ceph_pg_to_up_acting_osds(osdmap, pi, &pgid, &up, &acting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) 	for (i = 0; i < acting.size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) 		if (acting.osds[i] == acting.primary) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) 			spgid->pgid = pgid; /* struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) 			spgid->shard = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) 			return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838)  * Return acting primary for given PG, or -1 if none.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) 			      const struct ceph_pg *raw_pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 	struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 	struct ceph_osds up, acting;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 	pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 	if (!pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) 	ceph_pg_to_up_acting_osds(osdmap, pi, raw_pgid, &up, &acting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) 	return acting.primary;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) EXPORT_SYMBOL(ceph_pg_to_acting_primary);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) static struct crush_loc_node *alloc_crush_loc(size_t type_name_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) 					      size_t name_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) 	struct crush_loc_node *loc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) 	loc = kmalloc(sizeof(*loc) + type_name_len + name_len + 2, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) 	if (!loc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) 	RB_CLEAR_NODE(&loc->cl_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) 	return loc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) static void free_crush_loc(struct crush_loc_node *loc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) 	WARN_ON(!RB_EMPTY_NODE(&loc->cl_node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) 	kfree(loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) static int crush_loc_compare(const struct crush_loc *loc1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) 			     const struct crush_loc *loc2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) 	return strcmp(loc1->cl_type_name, loc2->cl_type_name) ?:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) 	       strcmp(loc1->cl_name, loc2->cl_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) DEFINE_RB_FUNCS2(crush_loc, struct crush_loc_node, cl_loc, crush_loc_compare,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) 		 RB_BYPTR, const struct crush_loc *, cl_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886)  * Parses a set of <bucket type name>':'<bucket name> pairs separated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887)  * by '|', e.g. "rack:foo1|rack:foo2|datacenter:bar".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889)  * Note that @crush_location is modified by strsep().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) int ceph_parse_crush_location(char *crush_location, struct rb_root *locs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) 	struct crush_loc_node *loc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) 	const char *type_name, *name, *colon;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) 	size_t type_name_len, name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) 	dout("%s '%s'\n", __func__, crush_location);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) 	while ((type_name = strsep(&crush_location, "|"))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) 		colon = strchr(type_name, ':');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) 		if (!colon)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 		type_name_len = colon - type_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) 		if (type_name_len == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) 		name = colon + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) 		name_len = strlen(name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) 		if (name_len == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) 		loc = alloc_crush_loc(type_name_len, name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) 		if (!loc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) 			return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) 		loc->cl_loc.cl_type_name = loc->cl_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) 		memcpy(loc->cl_loc.cl_type_name, type_name, type_name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) 		loc->cl_loc.cl_type_name[type_name_len] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) 		loc->cl_loc.cl_name = loc->cl_data + type_name_len + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) 		memcpy(loc->cl_loc.cl_name, name, name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) 		loc->cl_loc.cl_name[name_len] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) 		if (!__insert_crush_loc(locs, loc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) 			free_crush_loc(loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) 			return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) 		dout("%s type_name '%s' name '%s'\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) 		     loc->cl_loc.cl_type_name, loc->cl_loc.cl_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) int ceph_compare_crush_locs(struct rb_root *locs1, struct rb_root *locs2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) 	struct rb_node *n1 = rb_first(locs1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) 	struct rb_node *n2 = rb_first(locs2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) 	for ( ; n1 && n2; n1 = rb_next(n1), n2 = rb_next(n2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) 		struct crush_loc_node *loc1 =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) 		    rb_entry(n1, struct crush_loc_node, cl_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) 		struct crush_loc_node *loc2 =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) 		    rb_entry(n2, struct crush_loc_node, cl_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) 		ret = crush_loc_compare(&loc1->cl_loc, &loc2->cl_loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) 	if (!n1 && n2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) 	if (n1 && !n2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) void ceph_clear_crush_locs(struct rb_root *locs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) 	while (!RB_EMPTY_ROOT(locs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) 		struct crush_loc_node *loc =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) 		    rb_entry(rb_first(locs), struct crush_loc_node, cl_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) 		erase_crush_loc(locs, loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) 		free_crush_loc(loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972)  * [a-zA-Z0-9-_.]+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) static bool is_valid_crush_name(const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) 		if (!('a' <= *name && *name <= 'z') &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) 		    !('A' <= *name && *name <= 'Z') &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) 		    !('0' <= *name && *name <= '9') &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) 		    *name != '-' && *name != '_' && *name != '.')
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) 	} while (*++name != '\0');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988)  * Gets the parent of an item.  Returns its id (<0 because the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989)  * parent is always a bucket), type id (>0 for the same reason,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990)  * via @parent_type_id) and location (via @parent_loc).  If no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991)  * parent, returns 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993)  * Does a linear search, as there are no parent pointers of any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994)  * kind.  Note that the result is ambigous for items that occur
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995)  * multiple times in the map.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) static int get_immediate_parent(struct crush_map *c, int id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) 				u16 *parent_type_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) 				struct crush_loc *parent_loc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) 	struct crush_bucket *b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) 	struct crush_name_node *type_cn, *cn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) 	int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) 	for (i = 0; i < c->max_buckets; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) 		b = c->buckets[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) 		if (!b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) 		/* ignore per-class shadow hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) 		cn = lookup_crush_name(&c->names, b->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) 		if (!cn || !is_valid_crush_name(cn->cn_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) 		for (j = 0; j < b->size; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) 			if (b->items[j] != id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) 			*parent_type_id = b->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) 			type_cn = lookup_crush_name(&c->type_names, b->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) 			parent_loc->cl_type_name = type_cn->cn_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) 			parent_loc->cl_name = cn->cn_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) 			return b->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) 	return 0;  /* no parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031)  * Calculates the locality/distance from an item to a client
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032)  * location expressed in terms of CRUSH hierarchy as a set of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033)  * (bucket type name, bucket name) pairs.  Specifically, looks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034)  * for the lowest-valued bucket type for which the location of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035)  * @id matches one of the locations in @locs, so for standard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036)  * bucket types (host = 1, rack = 3, datacenter = 8, zone = 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037)  * a matching host is closer than a matching rack and a matching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038)  * data center is closer than a matching zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040)  * Specifying multiple locations (a "multipath" location) such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041)  * as "rack=foo1 rack=foo2 datacenter=bar" is allowed -- @locs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042)  * is a multimap.  The locality will be:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044)  * - 3 for OSDs in racks foo1 and foo2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045)  * - 8 for OSDs in data center bar
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046)  * - -1 for all other OSDs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048)  * The lowest possible bucket type is 1, so the best locality
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049)  * for an OSD is 1 (i.e. a matching host).  Locality 0 would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050)  * the OSD itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) int ceph_get_crush_locality(struct ceph_osdmap *osdmap, int id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) 			    struct rb_root *locs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) 	struct crush_loc loc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) 	u16 type_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) 	 * Instead of repeated get_immediate_parent() calls,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) 	 * the location of @id could be obtained with a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) 	 * depth-first traversal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) 	for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) 		id = get_immediate_parent(osdmap->crush, id, &type_id, &loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) 		if (id >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) 			return -1;  /* not local */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) 		if (lookup_crush_loc(locs, &loc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) 			return type_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) }