^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <linux/ceph/ceph_debug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/ceph/libceph.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/ceph/osdmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <linux/ceph/decode.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/crush/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/crush/mapper.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) char *ceph_osdmap_state_str(char *str, int len, u32 state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) if (!len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) return str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) if ((state & CEPH_OSD_EXISTS) && (state & CEPH_OSD_UP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) snprintf(str, len, "exists, up");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) else if (state & CEPH_OSD_EXISTS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) snprintf(str, len, "exists");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) else if (state & CEPH_OSD_UP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) snprintf(str, len, "up");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) snprintf(str, len, "doesn't exist");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) return str;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /* maps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) static int calc_bits_of(unsigned int t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) int b = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) while (t) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) t = t >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) b++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) return b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * the foo_mask is the smallest value 2^n-1 that is >= foo.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) static void calc_pg_masks(struct ceph_pg_pool_info *pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) pi->pg_num_mask = (1 << calc_bits_of(pi->pg_num-1)) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) pi->pgp_num_mask = (1 << calc_bits_of(pi->pgp_num-1)) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * decode crush map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static int crush_decode_uniform_bucket(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) struct crush_bucket_uniform *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) dout("crush_decode_uniform_bucket %p to %p\n", *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) ceph_decode_need(p, end, (1+b->h.size) * sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) b->item_weight = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) static int crush_decode_list_bucket(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) struct crush_bucket_list *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) dout("crush_decode_list_bucket %p to %p\n", *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) if (b->item_weights == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) b->sum_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) if (b->sum_weights == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) for (j = 0; j < b->h.size; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) b->item_weights[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) b->sum_weights[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) static int crush_decode_tree_bucket(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) struct crush_bucket_tree *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) dout("crush_decode_tree_bucket %p to %p\n", *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) ceph_decode_8_safe(p, end, b->num_nodes, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) b->node_weights = kcalloc(b->num_nodes, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) if (b->node_weights == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) ceph_decode_need(p, end, b->num_nodes * sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) for (j = 0; j < b->num_nodes; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) b->node_weights[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) static int crush_decode_straw_bucket(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) struct crush_bucket_straw *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) dout("crush_decode_straw_bucket %p to %p\n", *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (b->item_weights == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) b->straws = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) if (b->straws == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) ceph_decode_need(p, end, 2 * b->h.size * sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) for (j = 0; j < b->h.size; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) b->item_weights[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) b->straws[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) static int crush_decode_straw2_bucket(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) struct crush_bucket_straw2 *b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) int j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) dout("crush_decode_straw2_bucket %p to %p\n", *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) b->item_weights = kcalloc(b->h.size, sizeof(u32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) if (b->item_weights == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) ceph_decode_need(p, end, b->h.size * sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) for (j = 0; j < b->h.size; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) b->item_weights[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) struct crush_name_node {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) struct rb_node cn_node;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) int cn_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) char cn_name[];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) static struct crush_name_node *alloc_crush_name(size_t name_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) struct crush_name_node *cn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) cn = kmalloc(sizeof(*cn) + name_len + 1, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) if (!cn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) RB_CLEAR_NODE(&cn->cn_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) return cn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) static void free_crush_name(struct crush_name_node *cn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) WARN_ON(!RB_EMPTY_NODE(&cn->cn_node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) kfree(cn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) DEFINE_RB_FUNCS(crush_name, struct crush_name_node, cn_id, cn_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) static int decode_crush_names(void **p, void *end, struct rb_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) u32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) ceph_decode_32_safe(p, end, n, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) while (n--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) struct crush_name_node *cn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) int id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) u32 name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) ceph_decode_32_safe(p, end, id, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) ceph_decode_32_safe(p, end, name_len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) ceph_decode_need(p, end, name_len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) cn = alloc_crush_name(name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) if (!cn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) cn->cn_id = id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) memcpy(cn->cn_name, *p, name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) cn->cn_name[name_len] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) *p += name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) if (!__insert_crush_name(root, cn)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) free_crush_name(cn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) void clear_crush_names(struct rb_root *root)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) while (!RB_EMPTY_ROOT(root)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) struct crush_name_node *cn =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) rb_entry(rb_first(root), struct crush_name_node, cn_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) erase_crush_name(root, cn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) free_crush_name(cn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) static struct crush_choose_arg_map *alloc_choose_arg_map(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) struct crush_choose_arg_map *arg_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) arg_map = kzalloc(sizeof(*arg_map), GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) if (!arg_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) RB_CLEAR_NODE(&arg_map->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) return arg_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) static void free_choose_arg_map(struct crush_choose_arg_map *arg_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) if (arg_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) WARN_ON(!RB_EMPTY_NODE(&arg_map->node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) for (i = 0; i < arg_map->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) struct crush_choose_arg *arg = &arg_map->args[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) for (j = 0; j < arg->weight_set_size; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) kfree(arg->weight_set[j].weights);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) kfree(arg->weight_set);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) kfree(arg->ids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) kfree(arg_map->args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) kfree(arg_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) DEFINE_RB_FUNCS(choose_arg_map, struct crush_choose_arg_map, choose_args_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) void clear_choose_args(struct crush_map *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) while (!RB_EMPTY_ROOT(&c->choose_args)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) struct crush_choose_arg_map *arg_map =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) rb_entry(rb_first(&c->choose_args),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) struct crush_choose_arg_map, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) erase_choose_arg_map(&c->choose_args, arg_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) free_choose_arg_map(arg_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) static u32 *decode_array_32_alloc(void **p, void *end, u32 *plen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) u32 *a = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) if (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) a = kmalloc_array(len, sizeof(u32), GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) if (!a) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) ceph_decode_need(p, end, len * sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) for (i = 0; i < len; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) a[i] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) *plen = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) return a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) kfree(a);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * Assumes @arg is zero-initialized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) static int decode_choose_arg(void **p, void *end, struct crush_choose_arg *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) ceph_decode_32_safe(p, end, arg->weight_set_size, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) if (arg->weight_set_size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) arg->weight_set = kmalloc_array(arg->weight_set_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) sizeof(*arg->weight_set),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) if (!arg->weight_set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) for (i = 0; i < arg->weight_set_size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) struct crush_weight_set *w = &arg->weight_set[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) w->weights = decode_array_32_alloc(p, end, &w->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) if (IS_ERR(w->weights)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) ret = PTR_ERR(w->weights);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) w->weights = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) arg->ids = decode_array_32_alloc(p, end, &arg->ids_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) if (IS_ERR(arg->ids)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) ret = PTR_ERR(arg->ids);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) arg->ids = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) static int decode_choose_args(void **p, void *end, struct crush_map *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) struct crush_choose_arg_map *arg_map = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) u32 num_choose_arg_maps, num_buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) ceph_decode_32_safe(p, end, num_choose_arg_maps, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) while (num_choose_arg_maps--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) arg_map = alloc_choose_arg_map();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) if (!arg_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) ceph_decode_64_safe(p, end, arg_map->choose_args_index,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) arg_map->size = c->max_buckets;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) arg_map->args = kcalloc(arg_map->size, sizeof(*arg_map->args),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) if (!arg_map->args) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) ceph_decode_32_safe(p, end, num_buckets, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) while (num_buckets--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) struct crush_choose_arg *arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) u32 bucket_index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) ceph_decode_32_safe(p, end, bucket_index, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) if (bucket_index >= arg_map->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) arg = &arg_map->args[bucket_index];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) ret = decode_choose_arg(p, end, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) if (arg->ids_size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) arg->ids_size != c->buckets[bucket_index]->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) insert_choose_arg_map(&c->choose_args, arg_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) ret = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) free_choose_arg_map(arg_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) static void crush_finalize(struct crush_map *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) __s32 b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) /* Space for the array of pointers to per-bucket workspace */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) c->working_size = sizeof(struct crush_work) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) c->max_buckets * sizeof(struct crush_work_bucket *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) for (b = 0; b < c->max_buckets; b++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) if (!c->buckets[b])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) switch (c->buckets[b]->alg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) * The base case, permutation variables and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) * the pointer to the permutation array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) c->working_size += sizeof(struct crush_work_bucket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) /* Every bucket has a permutation array. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) c->working_size += c->buckets[b]->size * sizeof(__u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) static struct crush_map *crush_decode(void *pbyval, void *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) struct crush_map *c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) void **p = &pbyval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) void *start = pbyval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) u32 magic;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) dout("crush_decode %p to %p len %d\n", *p, end, (int)(end - *p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) c = kzalloc(sizeof(*c), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) if (c == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) c->type_names = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) c->names = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) c->choose_args = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) /* set tunables to default values */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) c->choose_local_tries = 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) c->choose_local_fallback_tries = 5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) c->choose_total_tries = 19;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) c->chooseleaf_descend_once = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) ceph_decode_need(p, end, 4*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) magic = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) if (magic != CRUSH_MAGIC) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) pr_err("crush_decode magic %x != current %x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) (unsigned int)magic, (unsigned int)CRUSH_MAGIC);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) c->max_buckets = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) c->max_rules = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) c->max_devices = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) c->buckets = kcalloc(c->max_buckets, sizeof(*c->buckets), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) if (c->buckets == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) goto badmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) c->rules = kcalloc(c->max_rules, sizeof(*c->rules), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) if (c->rules == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) goto badmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) /* buckets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) for (i = 0; i < c->max_buckets; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) int size = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) u32 alg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) struct crush_bucket *b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) ceph_decode_32_safe(p, end, alg, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) if (alg == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) c->buckets[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) dout("crush_decode bucket %d off %x %p to %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) i, (int)(*p-start), *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) switch (alg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) case CRUSH_BUCKET_UNIFORM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) size = sizeof(struct crush_bucket_uniform);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) case CRUSH_BUCKET_LIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) size = sizeof(struct crush_bucket_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) case CRUSH_BUCKET_TREE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) size = sizeof(struct crush_bucket_tree);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) case CRUSH_BUCKET_STRAW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) size = sizeof(struct crush_bucket_straw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) case CRUSH_BUCKET_STRAW2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) size = sizeof(struct crush_bucket_straw2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) BUG_ON(size == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) b = c->buckets[i] = kzalloc(size, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) if (b == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) goto badmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) ceph_decode_need(p, end, 4*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) b->id = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) b->type = ceph_decode_16(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) b->alg = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) b->hash = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) b->weight = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) b->size = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) dout("crush_decode bucket size %d off %x %p to %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) b->size, (int)(*p-start), *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) b->items = kcalloc(b->size, sizeof(__s32), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) if (b->items == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) goto badmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) ceph_decode_need(p, end, b->size*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) for (j = 0; j < b->size; j++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) b->items[j] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) switch (b->alg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) case CRUSH_BUCKET_UNIFORM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) err = crush_decode_uniform_bucket(p, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) (struct crush_bucket_uniform *)b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) case CRUSH_BUCKET_LIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) err = crush_decode_list_bucket(p, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) (struct crush_bucket_list *)b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) case CRUSH_BUCKET_TREE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) err = crush_decode_tree_bucket(p, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) (struct crush_bucket_tree *)b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) case CRUSH_BUCKET_STRAW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) err = crush_decode_straw_bucket(p, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) (struct crush_bucket_straw *)b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) case CRUSH_BUCKET_STRAW2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) err = crush_decode_straw2_bucket(p, end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) (struct crush_bucket_straw2 *)b);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) if (err < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) /* rules */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) dout("rule vec is %p\n", c->rules);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) for (i = 0; i < c->max_rules; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) u32 yes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) struct crush_rule *r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) ceph_decode_32_safe(p, end, yes, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) if (!yes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) dout("crush_decode NO rule %d off %x %p to %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) i, (int)(*p-start), *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) c->rules[i] = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) dout("crush_decode rule %d off %x %p to %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) i, (int)(*p-start), *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) /* len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) ceph_decode_32_safe(p, end, yes, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) #if BITS_PER_LONG == 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) if (yes > (ULONG_MAX - sizeof(*r))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) / sizeof(struct crush_rule_step))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) r = kmalloc(struct_size(r, steps, yes), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) c->rules[i] = r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) if (r == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) goto badmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) dout(" rule %d is at %p\n", i, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) r->len = yes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) ceph_decode_copy_safe(p, end, &r->mask, 4, bad); /* 4 u8's */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) ceph_decode_need(p, end, r->len*3*sizeof(u32), bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) for (j = 0; j < r->len; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) r->steps[j].op = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) r->steps[j].arg1 = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) r->steps[j].arg2 = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) err = decode_crush_names(p, end, &c->type_names);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) err = decode_crush_names(p, end, &c->names);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) ceph_decode_skip_map(p, end, 32, string, bad); /* rule_name_map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) /* tunables */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) ceph_decode_need(p, end, 3*sizeof(u32), done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) c->choose_local_tries = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) c->choose_local_fallback_tries = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) c->choose_total_tries = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) dout("crush decode tunable choose_local_tries = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) c->choose_local_tries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) dout("crush decode tunable choose_local_fallback_tries = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) c->choose_local_fallback_tries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) dout("crush decode tunable choose_total_tries = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) c->choose_total_tries);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) ceph_decode_need(p, end, sizeof(u32), done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) c->chooseleaf_descend_once = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) dout("crush decode tunable chooseleaf_descend_once = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) c->chooseleaf_descend_once);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) ceph_decode_need(p, end, sizeof(u8), done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) c->chooseleaf_vary_r = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) dout("crush decode tunable chooseleaf_vary_r = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) c->chooseleaf_vary_r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) /* skip straw_calc_version, allowed_bucket_algs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) ceph_decode_need(p, end, sizeof(u8) + sizeof(u32), done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) *p += sizeof(u8) + sizeof(u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) ceph_decode_need(p, end, sizeof(u8), done);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) c->chooseleaf_stable = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) dout("crush decode tunable chooseleaf_stable = %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) c->chooseleaf_stable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) if (*p != end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) /* class_map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) ceph_decode_skip_map(p, end, 32, 32, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) /* class_name */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) ceph_decode_skip_map(p, end, 32, string, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) /* class_bucket */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) ceph_decode_skip_map_of_map(p, end, 32, 32, 32, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) if (*p != end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) err = decode_choose_args(p, end, c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) crush_finalize(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) dout("crush_decode success\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) return c;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) badmem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) err = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) dout("crush_decode fail %d\n", err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) crush_destroy(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) goto fail;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) int ceph_pg_compare(const struct ceph_pg *lhs, const struct ceph_pg *rhs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) if (lhs->pool < rhs->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) if (lhs->pool > rhs->pool)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) if (lhs->seed < rhs->seed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) if (lhs->seed > rhs->seed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) int ceph_spg_compare(const struct ceph_spg *lhs, const struct ceph_spg *rhs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) ret = ceph_pg_compare(&lhs->pgid, &rhs->pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) if (lhs->shard < rhs->shard)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) if (lhs->shard > rhs->shard)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) static struct ceph_pg_mapping *alloc_pg_mapping(size_t payload_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) pg = kmalloc(sizeof(*pg) + payload_len, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) if (!pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) RB_CLEAR_NODE(&pg->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) return pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) static void free_pg_mapping(struct ceph_pg_mapping *pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) WARN_ON(!RB_EMPTY_NODE(&pg->node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) kfree(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) * rbtree of pg_mapping for handling pg_temp (explicit mapping of pgid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) * to a set of osds) and primary_temp (explicit primary setting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) DEFINE_RB_FUNCS2(pg_mapping, struct ceph_pg_mapping, pgid, ceph_pg_compare,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) RB_BYPTR, const struct ceph_pg *, node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) * rbtree of pg pool info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) DEFINE_RB_FUNCS(pg_pool, struct ceph_pg_pool_info, id, node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map, u64 id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) return lookup_pg_pool(&map->pg_pools, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) if (id == CEPH_NOPOOL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) if (WARN_ON_ONCE(id > (u64) INT_MAX))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) pi = lookup_pg_pool(&map->pg_pools, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) return pi ? pi->name : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) EXPORT_SYMBOL(ceph_pg_pool_name_by_id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) struct rb_node *rbp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) for (rbp = rb_first(&map->pg_pools); rbp; rbp = rb_next(rbp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) struct ceph_pg_pool_info *pi =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) rb_entry(rbp, struct ceph_pg_pool_info, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) if (pi->name && strcmp(pi->name, name) == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) return pi->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) EXPORT_SYMBOL(ceph_pg_poolid_by_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) pi = lookup_pg_pool(&map->pg_pools, id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) return pi ? pi->flags : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) EXPORT_SYMBOL(ceph_pg_pool_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) static void __remove_pg_pool(struct rb_root *root, struct ceph_pg_pool_info *pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) erase_pg_pool(root, pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) kfree(pi->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) kfree(pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) static int decode_pool(void **p, void *end, struct ceph_pg_pool_info *pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) u8 ev, cv;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) unsigned len, num;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) void *pool_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) ceph_decode_need(p, end, 2 + 4, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) ev = ceph_decode_8(p); /* encoding version */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) cv = ceph_decode_8(p); /* compat version */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) if (ev < 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) pr_warn("got v %d < 5 cv %d of ceph_pg_pool\n", ev, cv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) if (cv > 9) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) pr_warn("got v %d cv %d > 9 of ceph_pg_pool\n", ev, cv);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) ceph_decode_need(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) pool_end = *p + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) pi->type = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) pi->size = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) pi->crush_ruleset = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) pi->object_hash = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) pi->pg_num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) pi->pgp_num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) *p += 4 + 4; /* skip lpg* */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) *p += 4; /* skip last_change */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) *p += 8 + 4; /* skip snap_seq, snap_epoch */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) /* skip snaps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) while (num--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) *p += 8; /* snapid key */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) *p += 1 + 1; /* versions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) *p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) /* skip removed_snaps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) *p += num * (8 + 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) *p += 8; /* skip auid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) pi->flags = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) *p += 4; /* skip crash_replay_interval */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) if (ev >= 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) pi->min_size = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) pi->min_size = pi->size - pi->size / 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) if (ev >= 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) *p += 8 + 8; /* skip quota_max_* */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) if (ev >= 9) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) /* skip tiers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) *p += num * 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) *p += 8; /* skip tier_of */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) *p += 1; /* skip cache_mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) pi->read_tier = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) pi->write_tier = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) pi->read_tier = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) pi->write_tier = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) if (ev >= 10) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) /* skip properties */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) num = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) while (num--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) *p += len; /* key */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) *p += len; /* val */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) if (ev >= 11) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) /* skip hit_set_params */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) *p += 1 + 1; /* versions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) *p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) *p += 4; /* skip hit_set_period */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) *p += 4; /* skip hit_set_count */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) if (ev >= 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) *p += 4; /* skip stripe_width */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) if (ev >= 13) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) *p += 8; /* skip target_max_bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) *p += 8; /* skip target_max_objects */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) *p += 4; /* skip cache_target_dirty_ratio_micro */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) *p += 4; /* skip cache_target_full_ratio_micro */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) *p += 4; /* skip cache_min_flush_age */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) *p += 4; /* skip cache_min_evict_age */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) if (ev >= 14) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) /* skip erasure_code_profile */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) *p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) * last_force_op_resend_preluminous, will be overridden if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) * map was encoded with RESEND_ON_SPLIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) if (ev >= 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) pi->last_force_request_resend = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) pi->last_force_request_resend = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) if (ev >= 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) *p += 4; /* skip min_read_recency_for_promote */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) if (ev >= 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) *p += 8; /* skip expected_num_objects */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) if (ev >= 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) *p += 4; /* skip cache_target_dirty_high_ratio_micro */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) if (ev >= 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) *p += 4; /* skip min_write_recency_for_promote */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) if (ev >= 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) *p += 1; /* skip use_gmt_hitset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) if (ev >= 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) *p += 1; /* skip fast_read */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) if (ev >= 23) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) *p += 4; /* skip hit_set_grade_decay_rate */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) *p += 4; /* skip hit_set_search_last_n */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) if (ev >= 24) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) /* skip opts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) *p += 1 + 1; /* versions */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) *p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) if (ev >= 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) pi->last_force_request_resend = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) /* ignore the rest */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) *p = pool_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) calc_pg_masks(pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) static int decode_pool_names(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) u32 num, len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) u64 pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) ceph_decode_32_safe(p, end, num, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) dout(" %d pool names\n", num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) while (num--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) ceph_decode_64_safe(p, end, pool, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) ceph_decode_32_safe(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) dout(" pool %llu len %d\n", pool, len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) ceph_decode_need(p, end, len, bad);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) pi = lookup_pg_pool(&map->pg_pools, pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) if (pi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) char *name = kstrndup(*p, len, GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) if (!name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) kfree(pi->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) pi->name = name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) dout(" name is %s\n", pi->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) *p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) * CRUSH workspaces
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) * workspace_manager framework borrowed from fs/btrfs/compression.c.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971) * Two simplifications: there is only one type of workspace and there
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) * is always at least one workspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) static struct crush_work *alloc_workspace(const struct crush_map *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) struct crush_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) size_t work_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) WARN_ON(!c->working_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) work_size = crush_work_size(c, CEPH_PG_MAX_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) dout("%s work_size %zu bytes\n", __func__, work_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) work = ceph_kvmalloc(work_size, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) if (!work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) INIT_LIST_HEAD(&work->item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) crush_init_workspace(c, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) static void free_workspace(struct crush_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) WARN_ON(!list_empty(&work->item));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) kvfree(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) static void init_workspace_manager(struct workspace_manager *wsm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) INIT_LIST_HEAD(&wsm->idle_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) spin_lock_init(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) atomic_set(&wsm->total_ws, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) wsm->free_ws = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) init_waitqueue_head(&wsm->ws_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) static void add_initial_workspace(struct workspace_manager *wsm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) struct crush_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) WARN_ON(!list_empty(&wsm->idle_ws));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) list_add(&work->item, &wsm->idle_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) atomic_set(&wsm->total_ws, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) wsm->free_ws = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) static void cleanup_workspace_manager(struct workspace_manager *wsm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) struct crush_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) while (!list_empty(&wsm->idle_ws)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) work = list_first_entry(&wsm->idle_ws, struct crush_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) list_del_init(&work->item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) free_workspace(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) atomic_set(&wsm->total_ws, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) wsm->free_ws = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) * Finds an available workspace or allocates a new one. If it's not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) * possible to allocate a new one, waits until there is one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) static struct crush_work *get_workspace(struct workspace_manager *wsm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) const struct crush_map *c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) struct crush_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) int cpus = num_online_cpus();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) spin_lock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) if (!list_empty(&wsm->idle_ws)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) work = list_first_entry(&wsm->idle_ws, struct crush_work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) list_del_init(&work->item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) wsm->free_ws--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) spin_unlock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) if (atomic_read(&wsm->total_ws) > cpus) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) DEFINE_WAIT(wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) spin_unlock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) prepare_to_wait(&wsm->ws_wait, &wait, TASK_UNINTERRUPTIBLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) if (atomic_read(&wsm->total_ws) > cpus && !wsm->free_ws)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) schedule();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) finish_wait(&wsm->ws_wait, &wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) atomic_inc(&wsm->total_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) spin_unlock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) work = alloc_workspace(c);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) if (!work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) atomic_dec(&wsm->total_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) wake_up(&wsm->ws_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) * Do not return the error but go back to waiting. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) * have the inital workspace and the CRUSH computation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) * time is bounded so we will get it eventually.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) WARN_ON(atomic_read(&wsm->total_ws) < 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) return work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) * Puts a workspace back on the list or frees it if we have enough
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) * idle ones sitting around.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) static void put_workspace(struct workspace_manager *wsm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) struct crush_work *work)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) spin_lock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) if (wsm->free_ws <= num_online_cpus()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) list_add(&work->item, &wsm->idle_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) wsm->free_ws++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) spin_unlock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) goto wake;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) spin_unlock(&wsm->ws_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) free_workspace(work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) atomic_dec(&wsm->total_ws);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) wake:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) if (wq_has_sleeper(&wsm->ws_wait))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) wake_up(&wsm->ws_wait);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) * osd map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) struct ceph_osdmap *ceph_osdmap_alloc(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) struct ceph_osdmap *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) map = kzalloc(sizeof(*map), GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) if (!map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) map->pg_pools = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) map->pool_max = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) map->pg_temp = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) map->primary_temp = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) map->pg_upmap = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) map->pg_upmap_items = RB_ROOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) init_workspace_manager(&map->crush_wsm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) return map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) void ceph_osdmap_destroy(struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) dout("osdmap_destroy %p\n", map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) if (map->crush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) crush_destroy(map->crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) cleanup_workspace_manager(&map->crush_wsm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) while (!RB_EMPTY_ROOT(&map->pg_temp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) struct ceph_pg_mapping *pg =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) rb_entry(rb_first(&map->pg_temp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) struct ceph_pg_mapping, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) erase_pg_mapping(&map->pg_temp, pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) free_pg_mapping(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) while (!RB_EMPTY_ROOT(&map->primary_temp)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) struct ceph_pg_mapping *pg =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) rb_entry(rb_first(&map->primary_temp),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) struct ceph_pg_mapping, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) erase_pg_mapping(&map->primary_temp, pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) free_pg_mapping(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) while (!RB_EMPTY_ROOT(&map->pg_upmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) struct ceph_pg_mapping *pg =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) rb_entry(rb_first(&map->pg_upmap),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) struct ceph_pg_mapping, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) rb_erase(&pg->node, &map->pg_upmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) kfree(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) while (!RB_EMPTY_ROOT(&map->pg_upmap_items)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) struct ceph_pg_mapping *pg =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) rb_entry(rb_first(&map->pg_upmap_items),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) struct ceph_pg_mapping, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) rb_erase(&pg->node, &map->pg_upmap_items);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) kfree(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) while (!RB_EMPTY_ROOT(&map->pg_pools)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) struct ceph_pg_pool_info *pi =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) rb_entry(rb_first(&map->pg_pools),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) struct ceph_pg_pool_info, node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) __remove_pg_pool(&map->pg_pools, pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) kvfree(map->osd_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) kvfree(map->osd_weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) kvfree(map->osd_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) kvfree(map->osd_primary_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) kfree(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) * Adjust max_osd value, (re)allocate arrays.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) * The new elements are properly initialized.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) static int osdmap_set_max_osd(struct ceph_osdmap *map, u32 max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) u32 *state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) u32 *weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) struct ceph_entity_addr *addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) u32 to_copy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) dout("%s old %u new %u\n", __func__, map->max_osd, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) if (max == map->max_osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) state = ceph_kvmalloc(array_size(max, sizeof(*state)), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) weight = ceph_kvmalloc(array_size(max, sizeof(*weight)), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) addr = ceph_kvmalloc(array_size(max, sizeof(*addr)), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) if (!state || !weight || !addr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) kvfree(state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) kvfree(weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) kvfree(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) to_copy = min(map->max_osd, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) if (map->osd_state) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) memcpy(state, map->osd_state, to_copy * sizeof(*state));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) memcpy(weight, map->osd_weight, to_copy * sizeof(*weight));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) memcpy(addr, map->osd_addr, to_copy * sizeof(*addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) kvfree(map->osd_state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) kvfree(map->osd_weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) kvfree(map->osd_addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) map->osd_state = state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) map->osd_weight = weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) map->osd_addr = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) for (i = map->max_osd; i < max; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) map->osd_state[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) map->osd_weight[i] = CEPH_OSD_OUT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) memset(map->osd_addr + i, 0, sizeof(*map->osd_addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) if (map->osd_primary_affinity) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) u32 *affinity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) affinity = ceph_kvmalloc(array_size(max, sizeof(*affinity)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) if (!affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) memcpy(affinity, map->osd_primary_affinity,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) to_copy * sizeof(*affinity));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) kvfree(map->osd_primary_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) map->osd_primary_affinity = affinity;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) for (i = map->max_osd; i < max; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) map->osd_primary_affinity[i] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) map->max_osd = max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) static int osdmap_set_crush(struct ceph_osdmap *map, struct crush_map *crush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) struct crush_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) if (IS_ERR(crush))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) return PTR_ERR(crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) work = alloc_workspace(crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) if (!work) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) crush_destroy(crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) if (map->crush)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) crush_destroy(map->crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) cleanup_workspace_manager(&map->crush_wsm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) map->crush = crush;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) add_initial_workspace(&map->crush_wsm, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) #define OSDMAP_WRAPPER_COMPAT_VER 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) #define OSDMAP_CLIENT_DATA_COMPAT_VER 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) * Return 0 or error. On success, *v is set to 0 for old (v6) osdmaps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) * to struct_v of the client_data section for new (v7 and above)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) * osdmaps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) static int get_osdmap_client_data_v(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) const char *prefix, u8 *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) u8 struct_v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) ceph_decode_8_safe(p, end, struct_v, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) if (struct_v >= 7) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) u8 struct_compat;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) ceph_decode_8_safe(p, end, struct_compat, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) if (struct_compat > OSDMAP_WRAPPER_COMPAT_VER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) pr_warn("got v %d cv %d > %d of %s ceph_osdmap\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) struct_v, struct_compat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) OSDMAP_WRAPPER_COMPAT_VER, prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) *p += 4; /* ignore wrapper struct_len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) ceph_decode_8_safe(p, end, struct_v, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) ceph_decode_8_safe(p, end, struct_compat, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) if (struct_compat > OSDMAP_CLIENT_DATA_COMPAT_VER) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) pr_warn("got v %d cv %d > %d of %s ceph_osdmap client data\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) struct_v, struct_compat,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) OSDMAP_CLIENT_DATA_COMPAT_VER, prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) *p += 4; /* ignore client data struct_len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) u16 version;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) *p -= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) ceph_decode_16_safe(p, end, version, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) if (version < 6) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) pr_warn("got v %d < 6 of %s ceph_osdmap\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) version, prefix);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) /* old osdmap enconding */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) struct_v = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) *v = struct_v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) static int __decode_pools(void **p, void *end, struct ceph_osdmap *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) bool incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) u32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) ceph_decode_32_safe(p, end, n, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) while (n--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) u64 pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) ceph_decode_64_safe(p, end, pool, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) pi = lookup_pg_pool(&map->pg_pools, pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) if (!incremental || !pi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) pi = kzalloc(sizeof(*pi), GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) if (!pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) RB_CLEAR_NODE(&pi->node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) pi->id = pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) if (!__insert_pg_pool(&map->pg_pools, pi)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) kfree(pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) ret = decode_pool(p, end, pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) static int decode_pools(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) return __decode_pools(p, end, map, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) static int decode_new_pools(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) return __decode_pools(p, end, map, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) typedef struct ceph_pg_mapping *(*decode_mapping_fn_t)(void **, void *, bool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) static int decode_pg_mapping(void **p, void *end, struct rb_root *mapping_root,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) decode_mapping_fn_t fn, bool incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) u32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) WARN_ON(!incremental && !fn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) ceph_decode_32_safe(p, end, n, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) while (n--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) struct ceph_pg pgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) ret = ceph_decode_pgid(p, end, &pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) pg = lookup_pg_mapping(mapping_root, &pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) if (pg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) WARN_ON(!incremental);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) erase_pg_mapping(mapping_root, pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) free_pg_mapping(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) if (fn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) pg = fn(p, end, incremental);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) if (IS_ERR(pg))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) return PTR_ERR(pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) if (pg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) pg->pgid = pgid; /* struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) insert_pg_mapping(mapping_root, pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) static struct ceph_pg_mapping *__decode_pg_temp(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) bool incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) u32 len, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) if (len == 0 && incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) return NULL; /* new_pg_temp: [] to remove */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) if (len > (SIZE_MAX - sizeof(*pg)) / sizeof(u32))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) ceph_decode_need(p, end, len * sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) pg = alloc_pg_mapping(len * sizeof(u32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) if (!pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) pg->pg_temp.len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) for (i = 0; i < len; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) pg->pg_temp.osds[i] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) return pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) static int decode_pg_temp(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) return decode_pg_mapping(p, end, &map->pg_temp, __decode_pg_temp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) static int decode_new_pg_temp(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) return decode_pg_mapping(p, end, &map->pg_temp, __decode_pg_temp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) static struct ceph_pg_mapping *__decode_primary_temp(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) bool incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) u32 osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) ceph_decode_32_safe(p, end, osd, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) if (osd == (u32)-1 && incremental)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) return NULL; /* new_primary_temp: -1 to remove */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) pg = alloc_pg_mapping(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) if (!pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) pg->primary_temp.osd = osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) return pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) static int decode_primary_temp(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) return decode_pg_mapping(p, end, &map->primary_temp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) __decode_primary_temp, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) static int decode_new_primary_temp(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) return decode_pg_mapping(p, end, &map->primary_temp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) __decode_primary_temp, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) BUG_ON(osd >= map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) if (!map->osd_primary_affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) return map->osd_primary_affinity[osd];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) static int set_primary_affinity(struct ceph_osdmap *map, int osd, u32 aff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) BUG_ON(osd >= map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) if (!map->osd_primary_affinity) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) map->osd_primary_affinity = ceph_kvmalloc(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) array_size(map->max_osd, sizeof(*map->osd_primary_affinity)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) GFP_NOFS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) if (!map->osd_primary_affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) for (i = 0; i < map->max_osd; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) map->osd_primary_affinity[i] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) CEPH_OSD_DEFAULT_PRIMARY_AFFINITY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) map->osd_primary_affinity[osd] = aff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) static int decode_primary_affinity(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) u32 len, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) if (len == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) kvfree(map->osd_primary_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) map->osd_primary_affinity = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) if (len != map->max_osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) ceph_decode_need(p, end, map->max_osd*sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) for (i = 0; i < map->max_osd; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) ret = set_primary_affinity(map, i, ceph_decode_32(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) static int decode_new_primary_affinity(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) u32 n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) ceph_decode_32_safe(p, end, n, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) while (n--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) u32 osd, aff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) ceph_decode_32_safe(p, end, osd, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) ceph_decode_32_safe(p, end, aff, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) ret = set_primary_affinity(map, osd, aff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) pr_info("osd%d primary-affinity 0x%x\n", osd, aff);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) static struct ceph_pg_mapping *__decode_pg_upmap(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) bool __unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) return __decode_pg_temp(p, end, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) static int decode_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) static int decode_new_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) static int decode_old_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) return decode_pg_mapping(p, end, &map->pg_upmap, NULL, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) bool __unused)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) u32 len, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) pg = alloc_pg_mapping(2 * len * sizeof(u32));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) if (!pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) pg->pg_upmap_items.len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) for (i = 0; i < len; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) pg->pg_upmap_items.from_to[i][0] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) pg->pg_upmap_items.from_to[i][1] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) return pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) static int decode_pg_upmap_items(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) return decode_pg_mapping(p, end, &map->pg_upmap_items,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) __decode_pg_upmap_items, false);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) static int decode_new_pg_upmap_items(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) return decode_pg_mapping(p, end, &map->pg_upmap_items,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) __decode_pg_upmap_items, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) static int decode_old_pg_upmap_items(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) return decode_pg_mapping(p, end, &map->pg_upmap_items, NULL, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) * decode a full map.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) u8 struct_v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) u32 epoch = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) void *start = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) u32 max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) u32 len, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) err = get_osdmap_client_data_v(p, end, "full", &struct_v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) /* fsid, epoch, created, modified */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) ceph_decode_need(p, end, sizeof(map->fsid) + sizeof(u32) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) sizeof(map->created) + sizeof(map->modified), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) ceph_decode_copy(p, &map->fsid, sizeof(map->fsid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) epoch = map->epoch = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) ceph_decode_copy(p, &map->created, sizeof(map->created));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) ceph_decode_copy(p, &map->modified, sizeof(map->modified));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) /* pools */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) err = decode_pools(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) /* pool_name */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) err = decode_pool_names(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) ceph_decode_32_safe(p, end, map->pool_max, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) ceph_decode_32_safe(p, end, map->flags, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) /* max_osd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) ceph_decode_32_safe(p, end, max, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) /* (re)alloc osd arrays */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) err = osdmap_set_max_osd(map, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) /* osd_state, osd_weight, osd_addrs->client_addr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) ceph_decode_need(p, end, 3*sizeof(u32) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) map->max_osd*(struct_v >= 5 ? sizeof(u32) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) sizeof(u8)) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) sizeof(*map->osd_weight), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) if (ceph_decode_32(p) != map->max_osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) if (struct_v >= 5) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) for (i = 0; i < map->max_osd; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) map->osd_state[i] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) for (i = 0; i < map->max_osd; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) map->osd_state[i] = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) if (ceph_decode_32(p) != map->max_osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) for (i = 0; i < map->max_osd; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) map->osd_weight[i] = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) if (ceph_decode_32(p) != map->max_osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) for (i = 0; i < map->max_osd; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) err = ceph_decode_entity_addr(p, end, &map->osd_addr[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) /* pg_temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) err = decode_pg_temp(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) /* primary_temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) if (struct_v >= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) err = decode_primary_temp(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) /* primary_affinity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) if (struct_v >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) err = decode_primary_affinity(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) WARN_ON(map->osd_primary_affinity);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) /* crush */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) err = osdmap_set_crush(map, crush_decode(*p, min(*p + len, end)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) *p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) if (struct_v >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) /* erasure_code_profiles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) ceph_decode_skip_map_of_map(p, end, string, string, string,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) if (struct_v >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) err = decode_pg_upmap(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) err = decode_pg_upmap_items(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap_items));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) /* ignore the rest */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) *p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) dout("full osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) pr_err("corrupt full osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) err, epoch, (int)(*p - start), *p, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) print_hex_dump(KERN_DEBUG, "osdmap: ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) DUMP_PREFIX_OFFSET, 16, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) start, end - start, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) * Allocate and decode a full map.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) struct ceph_osdmap *map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) map = ceph_osdmap_alloc();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) if (!map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) ret = osdmap_decode(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) ceph_osdmap_destroy(map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) return map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) * Encoding order is (new_up_client, new_state, new_weight). Need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) * apply in the (new_weight, new_state, new_up_client) order, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) * an incremental map may look like e.g.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) * new_up_client: { osd=6, addr=... } # set osd_state and addr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) * new_state: { osd=6, xorstate=EXISTS } # clear osd_state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) static int decode_new_up_state_weight(void **p, void *end, u8 struct_v,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) void *new_up_client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) void *new_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) void *new_weight_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) u32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) new_up_client = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) for (i = 0; i < len; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) struct ceph_entity_addr addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) ceph_decode_skip_32(p, end, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) if (ceph_decode_entity_addr(p, end, &addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) new_state = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) len *= sizeof(u32) + (struct_v >= 5 ? sizeof(u32) : sizeof(u8));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) ceph_decode_need(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) *p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) /* new_weight */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) while (len--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) s32 osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) u32 w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) ceph_decode_need(p, end, 2*sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) osd = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) w = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) BUG_ON(osd >= map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) pr_info("osd%d weight 0x%x %s\n", osd, w,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) w == CEPH_OSD_IN ? "(in)" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) (w == CEPH_OSD_OUT ? "(out)" : ""));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) map->osd_weight[osd] = w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) * If we are marking in, set the EXISTS, and clear the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) * AUTOOUT and NEW bits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) if (w) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) map->osd_state[osd] |= CEPH_OSD_EXISTS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) map->osd_state[osd] &= ~(CEPH_OSD_AUTOOUT |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) CEPH_OSD_NEW);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) new_weight_end = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) /* new_state (up/down) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) *p = new_state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) while (len--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) s32 osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) u32 xorstate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) osd = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) if (struct_v >= 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) xorstate = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) xorstate = ceph_decode_8(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) if (xorstate == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) xorstate = CEPH_OSD_UP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) BUG_ON(osd >= map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) if ((map->osd_state[osd] & CEPH_OSD_UP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) (xorstate & CEPH_OSD_UP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) pr_info("osd%d down\n", osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) if ((map->osd_state[osd] & CEPH_OSD_EXISTS) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) (xorstate & CEPH_OSD_EXISTS)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) pr_info("osd%d does not exist\n", osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) ret = set_primary_affinity(map, osd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) CEPH_OSD_DEFAULT_PRIMARY_AFFINITY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) memset(map->osd_addr + osd, 0, sizeof(*map->osd_addr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) map->osd_state[osd] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) map->osd_state[osd] ^= xorstate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) /* new_up_client */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) *p = new_up_client;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) len = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) while (len--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) s32 osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) struct ceph_entity_addr addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) osd = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) BUG_ON(osd >= map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) if (ceph_decode_entity_addr(p, end, &addr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) goto e_inval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) pr_info("osd%d up\n", osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) map->osd_state[osd] |= CEPH_OSD_EXISTS | CEPH_OSD_UP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) map->osd_addr[osd] = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) *p = new_weight_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) * decode and apply an incremental map update.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) struct ceph_osdmap *map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) struct ceph_fsid fsid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) u32 epoch = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) struct ceph_timespec modified;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) s32 len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) u64 pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) __s64 new_pool_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) __s32 new_flags, max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) void *start = *p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) int err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) u8 struct_v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) dout("%s %p to %p len %d\n", __func__, *p, end, (int)(end - *p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) err = get_osdmap_client_data_v(p, end, "inc", &struct_v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) /* fsid, epoch, modified, new_pool_max, new_flags */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) ceph_decode_need(p, end, sizeof(fsid) + sizeof(u32) + sizeof(modified) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) sizeof(u64) + sizeof(u32), e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) ceph_decode_copy(p, &fsid, sizeof(fsid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) epoch = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) BUG_ON(epoch != map->epoch+1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) ceph_decode_copy(p, &modified, sizeof(modified));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) new_pool_max = ceph_decode_64(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) new_flags = ceph_decode_32(p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) /* full map? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) dout("apply_incremental full map len %d, %p to %p\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) len, *p, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) return ceph_osdmap_decode(p, min(*p+len, end));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) /* new crush? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) if (len > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) err = osdmap_set_crush(map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) crush_decode(*p, min(*p + len, end)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) *p += len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) /* new flags? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) if (new_flags >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) map->flags = new_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) if (new_pool_max >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) map->pool_max = new_pool_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) /* new max? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) ceph_decode_32_safe(p, end, max, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) if (max >= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) err = osdmap_set_max_osd(map, max);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) map->epoch++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) map->modified = modified;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) /* new_pools */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) err = decode_new_pools(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) /* new_pool_names */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) err = decode_pool_names(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) /* old_pool */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) ceph_decode_32_safe(p, end, len, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) while (len--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) ceph_decode_64_safe(p, end, pool, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) pi = lookup_pg_pool(&map->pg_pools, pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) if (pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) __remove_pg_pool(&map->pg_pools, pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) /* new_up_client, new_state, new_weight */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) err = decode_new_up_state_weight(p, end, struct_v, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) /* new_pg_temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) err = decode_new_pg_temp(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) /* new_primary_temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) if (struct_v >= 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) err = decode_new_primary_temp(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) /* new_primary_affinity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) if (struct_v >= 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) err = decode_new_primary_affinity(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) if (struct_v >= 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) /* new_erasure_code_profiles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) ceph_decode_skip_map_of_map(p, end, string, string, string,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) /* old_erasure_code_profiles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) ceph_decode_skip_set(p, end, string, e_inval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) if (struct_v >= 4) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) err = decode_new_pg_upmap(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) err = decode_old_pg_upmap(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) err = decode_new_pg_upmap_items(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) err = decode_old_pg_upmap_items(p, end, map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) /* ignore the rest */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) *p = end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) dout("inc osdmap epoch %d max_osd %d\n", map->epoch, map->max_osd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) return map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) e_inval:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) err = -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) pr_err("corrupt inc osdmap (%d) epoch %d off %d (%p of %p-%p)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) err, epoch, (int)(*p - start), *p, start, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) print_hex_dump(KERN_DEBUG, "osdmap: ",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) DUMP_PREFIX_OFFSET, 16, 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) start, end - start, true);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) return ERR_PTR(err);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) void ceph_oloc_copy(struct ceph_object_locator *dest,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) const struct ceph_object_locator *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) ceph_oloc_destroy(dest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) dest->pool = src->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) if (src->pool_ns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) dest->pool_ns = ceph_get_string(src->pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) dest->pool_ns = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) EXPORT_SYMBOL(ceph_oloc_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) void ceph_oloc_destroy(struct ceph_object_locator *oloc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) ceph_put_string(oloc->pool_ns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) EXPORT_SYMBOL(ceph_oloc_destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) void ceph_oid_copy(struct ceph_object_id *dest,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) const struct ceph_object_id *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) ceph_oid_destroy(dest);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) if (src->name != src->inline_name) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) /* very rare, see ceph_object_id definition */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) dest->name = kmalloc(src->name_len + 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) GFP_NOIO | __GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) dest->name = dest->inline_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) memcpy(dest->name, src->name, src->name_len + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) dest->name_len = src->name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) EXPORT_SYMBOL(ceph_oid_copy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) static __printf(2, 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) int oid_printf_vargs(struct ceph_object_id *oid, const char *fmt, va_list ap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) WARN_ON(!ceph_oid_empty(oid));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) len = vsnprintf(oid->inline_name, sizeof(oid->inline_name), fmt, ap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) if (len >= sizeof(oid->inline_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) return len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) oid->name_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) * If oid doesn't fit into inline buffer, BUG.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) void ceph_oid_printf(struct ceph_object_id *oid, const char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) va_list ap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) va_start(ap, fmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) BUG_ON(oid_printf_vargs(oid, fmt, ap));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) va_end(ap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) EXPORT_SYMBOL(ceph_oid_printf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) static __printf(3, 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) int oid_aprintf_vargs(struct ceph_object_id *oid, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) const char *fmt, va_list ap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) va_list aq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) va_copy(aq, ap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) len = oid_printf_vargs(oid, fmt, aq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) va_end(aq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) if (len) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) char *external_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) external_name = kmalloc(len + 1, gfp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) if (!external_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) oid->name = external_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) WARN_ON(vsnprintf(oid->name, len + 1, fmt, ap) != len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) oid->name_len = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) * If oid doesn't fit into inline buffer, allocate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) int ceph_oid_aprintf(struct ceph_object_id *oid, gfp_t gfp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) const char *fmt, ...)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) va_list ap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) va_start(ap, fmt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) ret = oid_aprintf_vargs(oid, gfp, fmt, ap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) va_end(ap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) EXPORT_SYMBOL(ceph_oid_aprintf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) void ceph_oid_destroy(struct ceph_object_id *oid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) if (oid->name != oid->inline_name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) kfree(oid->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) EXPORT_SYMBOL(ceph_oid_destroy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) * osds only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) static bool __osds_equal(const struct ceph_osds *lhs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) const struct ceph_osds *rhs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) if (lhs->size == rhs->size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) !memcmp(lhs->osds, rhs->osds, rhs->size * sizeof(rhs->osds[0])))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) * osds + primary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) static bool osds_equal(const struct ceph_osds *lhs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) const struct ceph_osds *rhs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) if (__osds_equal(lhs, rhs) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) lhs->primary == rhs->primary)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) static bool osds_valid(const struct ceph_osds *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) /* non-empty set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) if (set->size > 0 && set->primary >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) /* empty can_shift_osds set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) if (!set->size && set->primary == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) /* empty !can_shift_osds set - all NONE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) if (set->size > 0 && set->primary == -1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) for (i = 0; i < set->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) if (set->osds[i] != CRUSH_ITEM_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) if (i == set->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) void ceph_osds_copy(struct ceph_osds *dest, const struct ceph_osds *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) memcpy(dest->osds, src->osds, src->size * sizeof(src->osds[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) dest->size = src->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) dest->primary = src->primary;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) bool ceph_pg_is_split(const struct ceph_pg *pgid, u32 old_pg_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) u32 new_pg_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) int old_bits = calc_bits_of(old_pg_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) int old_mask = (1 << old_bits) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) int n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) WARN_ON(pgid->seed >= old_pg_num);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) if (new_pg_num <= old_pg_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) for (n = 1; ; n++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) int next_bit = n << (old_bits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) u32 s = next_bit | pgid->seed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) if (s < old_pg_num || s == pgid->seed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) if (s >= new_pg_num)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) s = ceph_stable_mod(s, old_pg_num, old_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) if (s == pgid->seed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) bool ceph_is_new_interval(const struct ceph_osds *old_acting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) const struct ceph_osds *new_acting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) const struct ceph_osds *old_up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) const struct ceph_osds *new_up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) int old_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) int new_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) int old_min_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) int new_min_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) u32 old_pg_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) u32 new_pg_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) bool old_sort_bitwise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) bool new_sort_bitwise,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) bool old_recovery_deletes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) bool new_recovery_deletes,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) const struct ceph_pg *pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) return !osds_equal(old_acting, new_acting) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) !osds_equal(old_up, new_up) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) old_size != new_size ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) old_min_size != new_min_size ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) old_sort_bitwise != new_sort_bitwise ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) old_recovery_deletes != new_recovery_deletes;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) static int calc_pg_rank(int osd, const struct ceph_osds *acting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) for (i = 0; i < acting->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) if (acting->osds[i] == osd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) static bool primary_changed(const struct ceph_osds *old_acting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) const struct ceph_osds *new_acting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) if (!old_acting->size && !new_acting->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) return false; /* both still empty */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) if (!old_acting->size ^ !new_acting->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) return true; /* was empty, now not, or vice versa */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) if (old_acting->primary != new_acting->primary)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) return true; /* primary changed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) if (calc_pg_rank(old_acting->primary, old_acting) !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) calc_pg_rank(new_acting->primary, new_acting))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) return false; /* same primary (tho replicas may have changed) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) bool ceph_osds_changed(const struct ceph_osds *old_acting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) const struct ceph_osds *new_acting,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) bool any_change)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) if (primary_changed(old_acting, new_acting))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) if (any_change && !__osds_equal(old_acting, new_acting))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) * Map an object into a PG.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) * Should only be called with target_oid and target_oloc (as opposed to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) * base_oid and base_oloc), since tiering isn't taken into account.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) void __ceph_object_locator_to_pg(struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) const struct ceph_object_id *oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) const struct ceph_object_locator *oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) struct ceph_pg *raw_pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) WARN_ON(pi->id != oloc->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) if (!oloc->pool_ns) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) raw_pgid->pool = oloc->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) raw_pgid->seed = ceph_str_hash(pi->object_hash, oid->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) oid->name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) dout("%s %s -> raw_pgid %llu.%x\n", __func__, oid->name,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) raw_pgid->pool, raw_pgid->seed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) char stack_buf[256];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) char *buf = stack_buf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) int nsl = oloc->pool_ns->len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) size_t total = nsl + 1 + oid->name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) if (total > sizeof(stack_buf))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) buf = kmalloc(total, GFP_NOIO | __GFP_NOFAIL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) memcpy(buf, oloc->pool_ns->str, nsl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) buf[nsl] = '\037';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) memcpy(buf + nsl + 1, oid->name, oid->name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) raw_pgid->pool = oloc->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) raw_pgid->seed = ceph_str_hash(pi->object_hash, buf, total);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) if (buf != stack_buf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) kfree(buf);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) dout("%s %s ns %.*s -> raw_pgid %llu.%x\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) oid->name, nsl, oloc->pool_ns->str,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) raw_pgid->pool, raw_pgid->seed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) int ceph_object_locator_to_pg(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) const struct ceph_object_id *oid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) const struct ceph_object_locator *oloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) struct ceph_pg *raw_pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) pi = ceph_pg_pool_by_id(osdmap, oloc->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) if (!pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) __ceph_object_locator_to_pg(pi, oid, oloc, raw_pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) EXPORT_SYMBOL(ceph_object_locator_to_pg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) * Map a raw PG (full precision ps) into an actual PG.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) static void raw_pg_to_pg(struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) const struct ceph_pg *raw_pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) struct ceph_pg *pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) pgid->pool = raw_pgid->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) pgid->seed = ceph_stable_mod(raw_pgid->seed, pi->pg_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) pi->pg_num_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) * Map a raw PG (full precision ps) into a placement ps (placement
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) * seed). Include pool id in that value so that different pools don't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) * use the same seeds.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) const struct ceph_pg *raw_pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) if (pi->flags & CEPH_POOL_FLAG_HASHPSPOOL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) /* hash pool id and seed so that pool PGs do not overlap */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) return crush_hash32_2(CRUSH_HASH_RJENKINS1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) ceph_stable_mod(raw_pgid->seed,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) pi->pgp_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) pi->pgp_num_mask),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) raw_pgid->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) * legacy behavior: add ps and pool together. this is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) * not a great approach because the PGs from each pool
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) * will overlap on top of each other: 0.5 == 1.4 ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) * 2.3 == ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) return ceph_stable_mod(raw_pgid->seed, pi->pgp_num,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) pi->pgp_num_mask) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) (unsigned)raw_pgid->pool;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) * Magic value used for a "default" fallback choose_args, used if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) * crush_choose_arg_map passed to do_crush() does not exist. If this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) * also doesn't exist, fall back to canonical weights.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) #define CEPH_DEFAULT_CHOOSE_ARGS -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) int *result, int result_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) const __u32 *weight, int weight_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) s64 choose_args_index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) struct crush_choose_arg_map *arg_map;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) struct crush_work *work;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) BUG_ON(result_max > CEPH_PG_MAX_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) arg_map = lookup_choose_arg_map(&map->crush->choose_args,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) choose_args_index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) if (!arg_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) arg_map = lookup_choose_arg_map(&map->crush->choose_args,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) CEPH_DEFAULT_CHOOSE_ARGS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) work = get_workspace(&map->crush_wsm, map->crush);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) r = crush_do_rule(map->crush, ruleno, x, result, result_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) weight, weight_max, work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) arg_map ? arg_map->args : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) put_workspace(&map->crush_wsm, work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) static void remove_nonexistent_osds(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) struct ceph_osds *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) if (ceph_can_shift_osds(pi)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) int removed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) /* shift left */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) for (i = 0; i < set->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) if (!ceph_osd_exists(osdmap, set->osds[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) removed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) if (removed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) set->osds[i - removed] = set->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) set->size -= removed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) /* set dne devices to NONE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) for (i = 0; i < set->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) if (!ceph_osd_exists(osdmap, set->osds[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) set->osds[i] = CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) * Calculate raw set (CRUSH output) for given PG and filter out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) * nonexistent OSDs. ->primary is undefined for a raw set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) * Placement seed (CRUSH input) is returned through @ppps.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) static void pg_to_raw_osds(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) const struct ceph_pg *raw_pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) struct ceph_osds *raw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) u32 *ppps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) u32 pps = raw_pg_to_pps(pi, raw_pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) int ruleno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) int len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) ceph_osds_init(raw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) if (ppps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) *ppps = pps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) ruleno = crush_find_rule(osdmap->crush, pi->crush_ruleset, pi->type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) pi->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) if (ruleno < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) pr_err("no crush rule: pool %lld ruleset %d type %d size %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) pi->id, pi->crush_ruleset, pi->type, pi->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) if (pi->size > ARRAY_SIZE(raw->osds)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) pr_err_ratelimited("pool %lld ruleset %d type %d too wide: size %d > %zu\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) pi->id, pi->crush_ruleset, pi->type, pi->size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) ARRAY_SIZE(raw->osds));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) len = do_crush(osdmap, ruleno, pps, raw->osds, pi->size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) osdmap->osd_weight, osdmap->max_osd, pi->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) if (len < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) pr_err("error %d from crush rule %d: pool %lld ruleset %d type %d size %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) len, ruleno, pi->id, pi->crush_ruleset, pi->type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) pi->size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) raw->size = len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) remove_nonexistent_osds(osdmap, pi, raw);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) /* apply pg_upmap[_items] mappings */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) static void apply_upmap(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) const struct ceph_pg *pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) struct ceph_osds *raw)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) pg = lookup_pg_mapping(&osdmap->pg_upmap, pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) if (pg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) /* make sure targets aren't marked out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) for (i = 0; i < pg->pg_upmap.len; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) int osd = pg->pg_upmap.osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) if (osd != CRUSH_ITEM_NONE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) osd < osdmap->max_osd &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) osdmap->osd_weight[osd] == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) /* reject/ignore explicit mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) for (i = 0; i < pg->pg_upmap.len; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) raw->osds[i] = pg->pg_upmap.osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) raw->size = pg->pg_upmap.len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) /* check and apply pg_upmap_items, if any */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) if (pg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) * Note: this approach does not allow a bidirectional swap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) for (i = 0; i < pg->pg_upmap_items.len; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) int from = pg->pg_upmap_items.from_to[i][0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) int to = pg->pg_upmap_items.from_to[i][1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) int pos = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) bool exists = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) /* make sure replacement doesn't already appear */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) for (j = 0; j < raw->size; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) int osd = raw->osds[j];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) if (osd == to) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) exists = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) /* ignore mapping if target is marked out */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) if (osd == from && pos < 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) !(to != CRUSH_ITEM_NONE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) to < osdmap->max_osd &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) osdmap->osd_weight[to] == 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) pos = j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) if (!exists && pos >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) raw->osds[pos] = to;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) * Given raw set, calculate up set and up primary. By definition of an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) * up set, the result won't contain nonexistent or down OSDs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) * This is done in-place - on return @set is the up set. If it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) * empty, ->primary will remain undefined.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) static void raw_to_up_osds(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) struct ceph_osds *set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) /* ->primary is undefined for a raw set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) BUG_ON(set->primary != -1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) if (ceph_can_shift_osds(pi)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) int removed = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) /* shift left */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) for (i = 0; i < set->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) if (ceph_osd_is_down(osdmap, set->osds[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) removed++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) if (removed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) set->osds[i - removed] = set->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) set->size -= removed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) if (set->size > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) set->primary = set->osds[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) /* set down/dne devices to NONE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) for (i = set->size - 1; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) if (ceph_osd_is_down(osdmap, set->osds[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) set->osds[i] = CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) set->primary = set->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) static void apply_primary_affinity(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) u32 pps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) struct ceph_osds *up)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) int pos = -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) * Do we have any non-default primary_affinity values for these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) * osds?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) if (!osdmap->osd_primary_affinity)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) for (i = 0; i < up->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) int osd = up->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) if (osd != CRUSH_ITEM_NONE &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) osdmap->osd_primary_affinity[osd] !=
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) CEPH_OSD_DEFAULT_PRIMARY_AFFINITY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) if (i == up->size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) * Pick the primary. Feed both the seed (for the pg) and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) * osd into the hash/rng so that a proportional fraction of an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) * osd's pgs get rejected as primary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) for (i = 0; i < up->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) int osd = up->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) u32 aff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) if (osd == CRUSH_ITEM_NONE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) aff = osdmap->osd_primary_affinity[osd];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) if (aff < CEPH_OSD_MAX_PRIMARY_AFFINITY &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) (crush_hash32_2(CRUSH_HASH_RJENKINS1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) pps, osd) >> 16) >= aff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) * We chose not to use this primary. Note it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) * anyway as a fallback in case we don't pick
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) * anyone else, but keep looking.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) if (pos < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) pos = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) pos = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) if (pos < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) up->primary = up->osds[pos];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) if (ceph_can_shift_osds(pi) && pos > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) /* move the new primary to the front */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) for (i = pos; i > 0; i--)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) up->osds[i] = up->osds[i - 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) up->osds[0] = up->primary;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) * Get pg_temp and primary_temp mappings for given PG.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) * Note that a PG may have none, only pg_temp, only primary_temp or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) * both pg_temp and primary_temp mappings. This means @temp isn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) * always a valid OSD set on return: in the "only primary_temp" case,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) * @temp will have its ->primary >= 0 but ->size == 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) static void get_temp_osds(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) const struct ceph_pg *pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) struct ceph_osds *temp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) struct ceph_pg_mapping *pg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) ceph_osds_init(temp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) /* pg_temp? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) pg = lookup_pg_mapping(&osdmap->pg_temp, pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) if (pg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) for (i = 0; i < pg->pg_temp.len; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) if (ceph_osd_is_down(osdmap, pg->pg_temp.osds[i])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) if (ceph_can_shift_osds(pi))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) temp->osds[temp->size++] = CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) temp->osds[temp->size++] = pg->pg_temp.osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) /* apply pg_temp's primary */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) for (i = 0; i < temp->size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) if (temp->osds[i] != CRUSH_ITEM_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) temp->primary = temp->osds[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) /* primary_temp? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) pg = lookup_pg_mapping(&osdmap->primary_temp, pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) if (pg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) temp->primary = pg->primary_temp.osd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) * Map a PG to its acting set as well as its up set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) * Acting set is used for data mapping purposes, while up set can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) * recorded for detecting interval changes and deciding whether to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) * resend a request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) void ceph_pg_to_up_acting_osds(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) const struct ceph_pg *raw_pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) struct ceph_osds *up,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) struct ceph_osds *acting)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) struct ceph_pg pgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) u32 pps;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) WARN_ON(pi->id != raw_pgid->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) raw_pg_to_pg(pi, raw_pgid, &pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) pg_to_raw_osds(osdmap, pi, raw_pgid, up, &pps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) apply_upmap(osdmap, &pgid, up);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) raw_to_up_osds(osdmap, pi, up);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) apply_primary_affinity(osdmap, pi, pps, up);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) get_temp_osds(osdmap, pi, &pgid, acting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) if (!acting->size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) memcpy(acting->osds, up->osds, up->size * sizeof(up->osds[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) acting->size = up->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) if (acting->primary == -1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) acting->primary = up->primary;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) WARN_ON(!osds_valid(up) || !osds_valid(acting));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) struct ceph_pg_pool_info *pi,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) const struct ceph_pg *raw_pgid,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) struct ceph_spg *spgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) struct ceph_pg pgid;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) struct ceph_osds up, acting;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) WARN_ON(pi->id != raw_pgid->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) raw_pg_to_pg(pi, raw_pgid, &pgid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) if (ceph_can_shift_osds(pi)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) spgid->pgid = pgid; /* struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) spgid->shard = CEPH_SPG_NOSHARD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) ceph_pg_to_up_acting_osds(osdmap, pi, &pgid, &up, &acting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) for (i = 0; i < acting.size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) if (acting.osds[i] == acting.primary) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) spgid->pgid = pgid; /* struct */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) spgid->shard = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) * Return acting primary for given PG, or -1 if none.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) const struct ceph_pg *raw_pgid)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) struct ceph_pg_pool_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) struct ceph_osds up, acting;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) pi = ceph_pg_pool_by_id(osdmap, raw_pgid->pool);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) if (!pi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) ceph_pg_to_up_acting_osds(osdmap, pi, raw_pgid, &up, &acting);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) return acting.primary;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) EXPORT_SYMBOL(ceph_pg_to_acting_primary);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) static struct crush_loc_node *alloc_crush_loc(size_t type_name_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) size_t name_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) struct crush_loc_node *loc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) loc = kmalloc(sizeof(*loc) + type_name_len + name_len + 2, GFP_NOIO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) if (!loc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) RB_CLEAR_NODE(&loc->cl_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) return loc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) static void free_crush_loc(struct crush_loc_node *loc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) WARN_ON(!RB_EMPTY_NODE(&loc->cl_node));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872) kfree(loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875) static int crush_loc_compare(const struct crush_loc *loc1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876) const struct crush_loc *loc2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878) return strcmp(loc1->cl_type_name, loc2->cl_type_name) ?:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879) strcmp(loc1->cl_name, loc2->cl_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882) DEFINE_RB_FUNCS2(crush_loc, struct crush_loc_node, cl_loc, crush_loc_compare,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883) RB_BYPTR, const struct crush_loc *, cl_node)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886) * Parses a set of <bucket type name>':'<bucket name> pairs separated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887) * by '|', e.g. "rack:foo1|rack:foo2|datacenter:bar".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889) * Note that @crush_location is modified by strsep().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891) int ceph_parse_crush_location(char *crush_location, struct rb_root *locs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) struct crush_loc_node *loc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) const char *type_name, *name, *colon;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) size_t type_name_len, name_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) dout("%s '%s'\n", __func__, crush_location);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) while ((type_name = strsep(&crush_location, "|"))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) colon = strchr(type_name, ':');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) if (!colon)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) type_name_len = colon - type_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) if (type_name_len == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) name = colon + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) name_len = strlen(name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909) if (name_len == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912) loc = alloc_crush_loc(type_name_len, name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913) if (!loc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916) loc->cl_loc.cl_type_name = loc->cl_data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917) memcpy(loc->cl_loc.cl_type_name, type_name, type_name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918) loc->cl_loc.cl_type_name[type_name_len] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920) loc->cl_loc.cl_name = loc->cl_data + type_name_len + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921) memcpy(loc->cl_loc.cl_name, name, name_len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922) loc->cl_loc.cl_name[name_len] = '\0';
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924) if (!__insert_crush_loc(locs, loc)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925) free_crush_loc(loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) dout("%s type_name '%s' name '%s'\n", __func__,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) loc->cl_loc.cl_type_name, loc->cl_loc.cl_name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) int ceph_compare_crush_locs(struct rb_root *locs1, struct rb_root *locs2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) struct rb_node *n1 = rb_first(locs1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) struct rb_node *n2 = rb_first(locs2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942) for ( ; n1 && n2; n1 = rb_next(n1), n2 = rb_next(n2)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943) struct crush_loc_node *loc1 =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944) rb_entry(n1, struct crush_loc_node, cl_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) struct crush_loc_node *loc2 =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) rb_entry(n2, struct crush_loc_node, cl_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) ret = crush_loc_compare(&loc1->cl_loc, &loc2->cl_loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) if (!n1 && n2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) if (n1 && !n2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960) void ceph_clear_crush_locs(struct rb_root *locs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) while (!RB_EMPTY_ROOT(locs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) struct crush_loc_node *loc =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) rb_entry(rb_first(locs), struct crush_loc_node, cl_node);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) erase_crush_loc(locs, loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) free_crush_loc(loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) * [a-zA-Z0-9-_.]+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) static bool is_valid_crush_name(const char *name)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) if (!('a' <= *name && *name <= 'z') &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) !('A' <= *name && *name <= 'Z') &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) !('0' <= *name && *name <= '9') &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) *name != '-' && *name != '_' && *name != '.')
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) } while (*++name != '\0');
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2985) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2987) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2988) * Gets the parent of an item. Returns its id (<0 because the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2989) * parent is always a bucket), type id (>0 for the same reason,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2990) * via @parent_type_id) and location (via @parent_loc). If no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2991) * parent, returns 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2992) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2993) * Does a linear search, as there are no parent pointers of any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2994) * kind. Note that the result is ambigous for items that occur
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2995) * multiple times in the map.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2996) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2997) static int get_immediate_parent(struct crush_map *c, int id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2998) u16 *parent_type_id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2999) struct crush_loc *parent_loc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3000) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3001) struct crush_bucket *b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3002) struct crush_name_node *type_cn, *cn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3003) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3005) for (i = 0; i < c->max_buckets; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3006) b = c->buckets[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3007) if (!b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3008) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3010) /* ignore per-class shadow hierarchy */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3011) cn = lookup_crush_name(&c->names, b->id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3012) if (!cn || !is_valid_crush_name(cn->cn_name))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3013) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3014)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3015) for (j = 0; j < b->size; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3016) if (b->items[j] != id)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3017) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3018)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3019) *parent_type_id = b->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3020) type_cn = lookup_crush_name(&c->type_names, b->type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3021) parent_loc->cl_type_name = type_cn->cn_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3022) parent_loc->cl_name = cn->cn_name;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3023) return b->id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3024) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3025) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3026)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3027) return 0; /* no parent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3028) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3029)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3030) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3031) * Calculates the locality/distance from an item to a client
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3032) * location expressed in terms of CRUSH hierarchy as a set of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3033) * (bucket type name, bucket name) pairs. Specifically, looks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3034) * for the lowest-valued bucket type for which the location of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3035) * @id matches one of the locations in @locs, so for standard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3036) * bucket types (host = 1, rack = 3, datacenter = 8, zone = 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3037) * a matching host is closer than a matching rack and a matching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3038) * data center is closer than a matching zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3039) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3040) * Specifying multiple locations (a "multipath" location) such
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3041) * as "rack=foo1 rack=foo2 datacenter=bar" is allowed -- @locs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3042) * is a multimap. The locality will be:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3043) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3044) * - 3 for OSDs in racks foo1 and foo2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3045) * - 8 for OSDs in data center bar
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3046) * - -1 for all other OSDs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3047) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3048) * The lowest possible bucket type is 1, so the best locality
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3049) * for an OSD is 1 (i.e. a matching host). Locality 0 would be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3050) * the OSD itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3051) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3052) int ceph_get_crush_locality(struct ceph_osdmap *osdmap, int id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3053) struct rb_root *locs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3054) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3055) struct crush_loc loc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3056) u16 type_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3058) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3059) * Instead of repeated get_immediate_parent() calls,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3060) * the location of @id could be obtained with a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3061) * depth-first traversal.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3062) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3063) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3064) id = get_immediate_parent(osdmap->crush, id, &type_id, &loc);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3065) if (id >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3066) return -1; /* not local */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3067)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3068) if (lookup_crush_loc(locs, &loc))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3069) return type_id;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3070) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3071) }