^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Ceph - scalable distributed file system
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2015 Intel Corporation All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * This is free software; you can redistribute it and/or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * modify it under the terms of the GNU Lesser General Public
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * License version 2.1, as published by the Free Software
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Foundation. See file COPYING.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #ifdef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) # include <linux/string.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) # include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) # include <linux/bug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) # include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) # include <linux/crush/crush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) # include <linux/crush/hash.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) # include <linux/crush/mapper.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) # include "crush_compat.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) # include "crush.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) # include "hash.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) # include "mapper.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include "crush_ln_table.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define dprintk(args...) /* printf(args) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * Implement the core CRUSH mapping algorithm.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * crush_find_rule - find a crush_rule id for a given ruleset, type, and size.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * @map: the crush_map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * @ruleset: the storage ruleset id (user defined)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * @type: storage ruleset type (user defined)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * @size: output set size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) int crush_find_rule(const struct crush_map *map, int ruleset, int type, int size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) __u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) for (i = 0; i < map->max_rules; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) if (map->rules[i] &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) map->rules[i]->mask.ruleset == ruleset &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) map->rules[i]->mask.type == type &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) map->rules[i]->mask.min_size <= size &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) map->rules[i]->mask.max_size >= size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * bucket choose methods
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) * For each bucket algorithm, we have a "choose" method that, given a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * crush input @x and replica position (usually, position in output set) @r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * will produce an item in the bucket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) * Choose based on a random permutation of the bucket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * We used to use some prime number arithmetic to do this, but it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * wasn't very random, and had some other bad behaviors. Instead, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * calculate an actual random permutation of the bucket members.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * Since this is expensive, we optimize for the r=0 case, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * captures the vast majority of calls.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) static int bucket_perm_choose(const struct crush_bucket *bucket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) struct crush_work_bucket *work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) int x, int r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) unsigned int pr = r % bucket->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) unsigned int i, s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) /* start a new permutation if @x has changed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) if (work->perm_x != (__u32)x || work->perm_n == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) dprintk("bucket %d new x=%d\n", bucket->id, x);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) work->perm_x = x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) /* optimize common r=0 case */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) if (pr == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) s = crush_hash32_3(bucket->hash, x, bucket->id, 0) %
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) bucket->size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) work->perm[0] = s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) work->perm_n = 0xffff; /* magic value, see below */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) for (i = 0; i < bucket->size; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) work->perm[i] = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) work->perm_n = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) } else if (work->perm_n == 0xffff) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) /* clean up after the r=0 case above */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) for (i = 1; i < bucket->size; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) work->perm[i] = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) work->perm[work->perm[0]] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) work->perm_n = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) /* calculate permutation up to pr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) for (i = 0; i < work->perm_n; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) dprintk(" perm_choose have %d: %d\n", i, work->perm[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) while (work->perm_n <= pr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) unsigned int p = work->perm_n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) /* no point in swapping the final entry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) if (p < bucket->size - 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) i = crush_hash32_3(bucket->hash, x, bucket->id, p) %
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) (bucket->size - p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) if (i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) unsigned int t = work->perm[p + i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) work->perm[p + i] = work->perm[p];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) work->perm[p] = t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) dprintk(" perm_choose swap %d with %d\n", p, p+i);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) work->perm_n++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) for (i = 0; i < bucket->size; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) dprintk(" perm_choose %d: %d\n", i, work->perm[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) s = work->perm[pr];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) dprintk(" perm_choose %d sz=%d x=%d r=%d (%d) s=%d\n", bucket->id,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) bucket->size, x, r, pr, s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) return bucket->items[s];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) /* uniform */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) static int bucket_uniform_choose(const struct crush_bucket_uniform *bucket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) struct crush_work_bucket *work, int x, int r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) return bucket_perm_choose(&bucket->h, work, x, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) /* list */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) static int bucket_list_choose(const struct crush_bucket_list *bucket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) int x, int r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) for (i = bucket->h.size-1; i >= 0; i--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) __u64 w = crush_hash32_4(bucket->h.hash, x, bucket->h.items[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) r, bucket->h.id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) w &= 0xffff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) dprintk("list_choose i=%d x=%d r=%d item %d weight %x "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) "sw %x rand %llx",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) i, x, r, bucket->h.items[i], bucket->item_weights[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) bucket->sum_weights[i], w);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) w *= bucket->sum_weights[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) w = w >> 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) /*dprintk(" scaled %llx\n", w);*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) if (w < bucket->item_weights[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) return bucket->h.items[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) dprintk("bad list sums for bucket %d\n", bucket->h.id);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) return bucket->h.items[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /* (binary) tree */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) static int height(int n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) int h = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) while ((n & 1) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) h++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) n = n >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) return h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) static int left(int x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) int h = height(x);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) return x - (1 << (h-1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) static int right(int x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) int h = height(x);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) return x + (1 << (h-1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) static int terminal(int x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) return x & 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) static int bucket_tree_choose(const struct crush_bucket_tree *bucket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) int x, int r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) int n;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) __u32 w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) __u64 t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /* start at root */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) n = bucket->num_nodes >> 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) while (!terminal(n)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) int l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) /* pick point in [0, w) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) w = bucket->node_weights[n];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) t = (__u64)crush_hash32_4(bucket->h.hash, x, n, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) bucket->h.id) * (__u64)w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) t = t >> 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) /* descend to the left or right? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) l = left(n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) if (t < bucket->node_weights[l])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) n = l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) n = right(n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) return bucket->h.items[n >> 1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) /* straw */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) static int bucket_straw_choose(const struct crush_bucket_straw *bucket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) int x, int r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) __u32 i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) int high = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) __u64 high_draw = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) __u64 draw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) for (i = 0; i < bucket->h.size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) draw = crush_hash32_3(bucket->h.hash, x, bucket->h.items[i], r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) draw &= 0xffff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) draw *= bucket->straws[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) if (i == 0 || draw > high_draw) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) high = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) high_draw = draw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) return bucket->h.items[high];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) /* compute 2^44*log2(input+1) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) static __u64 crush_ln(unsigned int xin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) unsigned int x = xin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) int iexpon, index1, index2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) __u64 RH, LH, LL, xl64, result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) x++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) /* normalize input */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) iexpon = 15;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * figure out number of bits we need to shift and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * do it in one step instead of iteratively
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) if (!(x & 0x18000)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) int bits = __builtin_clz(x & 0x1FFFF) - 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) x <<= bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) iexpon = 15 - bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) index1 = (x >> 8) << 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) /* RH ~ 2^56/index1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) RH = __RH_LH_tbl[index1 - 256];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) /* LH ~ 2^48 * log2(index1/256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) LH = __RH_LH_tbl[index1 + 1 - 256];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) /* RH*x ~ 2^48 * (2^15 + xf), xf<2^8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) xl64 = (__s64)x * RH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) xl64 >>= 48;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) result = iexpon;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) result <<= (12 + 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) index2 = xl64 & 0xff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) /* LL ~ 2^48*log2(1.0+index2/2^15) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) LL = __LL_tbl[index2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) LH = LH + LL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) LH >>= (48 - 12 - 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) result += LH;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * straw2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * for reference, see:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * https://en.wikipedia.org/wiki/Exponential_distribution#Distribution_of_the_minimum_of_exponential_random_variables
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) const struct crush_choose_arg *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) int position)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) if (!arg || !arg->weight_set)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) return bucket->item_weights;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) if (position >= arg->weight_set_size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) position = arg->weight_set_size - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) return arg->weight_set[position].weights;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) static __s32 *get_choose_arg_ids(const struct crush_bucket_straw2 *bucket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) const struct crush_choose_arg *arg)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) if (!arg || !arg->ids)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) return bucket->h.items;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) return arg->ids;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) static int bucket_straw2_choose(const struct crush_bucket_straw2 *bucket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) int x, int r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) const struct crush_choose_arg *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) int position)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) unsigned int i, high = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) unsigned int u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) __s64 ln, draw, high_draw = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) __u32 *weights = get_choose_arg_weights(bucket, arg, position);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) __s32 *ids = get_choose_arg_ids(bucket, arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) for (i = 0; i < bucket->h.size; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) dprintk("weight 0x%x item %d\n", weights[i], ids[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) if (weights[i]) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) u = crush_hash32_3(bucket->h.hash, x, ids[i], r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) u &= 0xffff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * for some reason slightly less than 0x10000 produces
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) * a slightly more accurate distribution... probably a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) * rounding effect.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) * the natural log lookup table maps [0,0xffff]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) * (corresponding to real numbers [1/0x10000, 1] to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) * [0, 0xffffffffffff] (corresponding to real numbers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) * [-11.090355,0]).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) ln = crush_ln(u) - 0x1000000000000ll;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * divide by 16.16 fixed-point weight. note
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) * that the ln value is negative, so a larger
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * weight means a larger (less negative) value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * for draw.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) draw = div64_s64(ln, weights[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) draw = S64_MIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) if (i == 0 || draw > high_draw) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) high = i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) high_draw = draw;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) return bucket->h.items[high];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) static int crush_bucket_choose(const struct crush_bucket *in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) struct crush_work_bucket *work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) int x, int r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) const struct crush_choose_arg *arg,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) int position)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) dprintk(" crush_bucket_choose %d x=%d r=%d\n", in->id, x, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) BUG_ON(in->size == 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) switch (in->alg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) case CRUSH_BUCKET_UNIFORM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) return bucket_uniform_choose(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) (const struct crush_bucket_uniform *)in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) work, x, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) case CRUSH_BUCKET_LIST:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) return bucket_list_choose((const struct crush_bucket_list *)in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) x, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) case CRUSH_BUCKET_TREE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) return bucket_tree_choose((const struct crush_bucket_tree *)in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) x, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) case CRUSH_BUCKET_STRAW:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) return bucket_straw_choose(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) (const struct crush_bucket_straw *)in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) x, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) case CRUSH_BUCKET_STRAW2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) return bucket_straw2_choose(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) (const struct crush_bucket_straw2 *)in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) x, r, arg, position);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) dprintk("unknown bucket %d alg %d\n", in->id, in->alg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) return in->items[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) * true if device is marked "out" (failed, fully offloaded)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) * of the cluster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) static int is_out(const struct crush_map *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) const __u32 *weight, int weight_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) int item, int x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) if (item >= weight_max)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) if (weight[item] >= 0x10000)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) if (weight[item] == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) if ((crush_hash32_2(CRUSH_HASH_RJENKINS1, x, item) & 0xffff)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) < weight[item])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) * crush_choose_firstn - choose numrep distinct items of given type
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) * @map: the crush_map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) * @bucket: the bucket we are choose an item from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) * @x: crush input value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) * @numrep: the number of items to choose
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) * @type: the type of item to choose
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) * @out: pointer to output vector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) * @outpos: our position in that vector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) * @out_size: size of the out vector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) * @tries: number of attempts to make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) * @recurse_tries: number of attempts to have recursive chooseleaf make
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) * @local_retries: localized retries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) * @local_fallback_retries: localized fallback retries
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) * @recurse_to_leaf: true if we want one device under each item of given type (chooseleaf instead of choose)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) * @stable: stable mode starts rep=0 in the recursive call for all replicas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) * @vary_r: pass r to recursive calls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) * @out2: second output vector for leaf items (if @recurse_to_leaf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) * @parent_r: r value passed from the parent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) static int crush_choose_firstn(const struct crush_map *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) struct crush_work *work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) const struct crush_bucket *bucket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) const __u32 *weight, int weight_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) int x, int numrep, int type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) int *out, int outpos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) int out_size,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) unsigned int tries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) unsigned int recurse_tries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) unsigned int local_retries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) unsigned int local_fallback_retries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) int recurse_to_leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) unsigned int vary_r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) unsigned int stable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) int *out2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) int parent_r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) const struct crush_choose_arg *choose_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) int rep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) unsigned int ftotal, flocal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) int retry_descent, retry_bucket, skip_rep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) const struct crush_bucket *in = bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) int item = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) int itemtype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) int collide, reject;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) int count = out_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d tries %d recurse_tries %d local_retries %d local_fallback_retries %d parent_r %d stable %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) recurse_to_leaf ? "_LEAF" : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) bucket->id, x, outpos, numrep,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) tries, recurse_tries, local_retries, local_fallback_retries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) parent_r, stable);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) for (rep = stable ? 0 : outpos; rep < numrep && count > 0 ; rep++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) /* keep trying until we get a non-out, non-colliding item */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) ftotal = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) skip_rep = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) retry_descent = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) in = bucket; /* initial bucket */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) /* choose through intervening buckets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) flocal = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) collide = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) retry_bucket = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) r = rep + parent_r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) /* r' = r + f_total */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) r += ftotal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) /* bucket choose */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) if (in->size == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) reject = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) goto reject;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) if (local_fallback_retries > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) flocal >= (in->size>>1) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) flocal > local_fallback_retries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) item = bucket_perm_choose(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) in, work->work[-1-in->id],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) x, r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) item = crush_bucket_choose(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) in, work->work[-1-in->id],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) x, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) (choose_args ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) &choose_args[-1-in->id] : NULL),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) outpos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) if (item >= map->max_devices) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) dprintk(" bad item %d\n", item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) skip_rep = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) /* desired type? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) if (item < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) itemtype = map->buckets[-1-item]->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) itemtype = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) dprintk(" item %d type %d\n", item, itemtype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) /* keep going? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) if (itemtype != type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) if (item >= 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) (-1-item) >= map->max_buckets) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) dprintk(" bad item type %d\n", type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) skip_rep = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) in = map->buckets[-1-item];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) retry_bucket = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) /* collision? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) for (i = 0; i < outpos; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) if (out[i] == item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) collide = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) reject = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) if (!collide && recurse_to_leaf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) if (item < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) int sub_r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) if (vary_r)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) sub_r = r >> (vary_r-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) sub_r = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) if (crush_choose_firstn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) map->buckets[-1-item],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) weight, weight_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) x, stable ? 1 : outpos+1, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) out2, outpos, count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) recurse_tries, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) local_retries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) local_fallback_retries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) vary_r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) stable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) sub_r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) choose_args) <= outpos)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) /* didn't get leaf */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) reject = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) /* we already have a leaf! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) out2[outpos] = item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) if (!reject && !collide) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) /* out? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) if (itemtype == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) reject = is_out(map, weight,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) weight_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) item, x);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) reject:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) if (reject || collide) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) ftotal++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) flocal++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) if (collide && flocal <= local_retries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) /* retry locally a few times */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) retry_bucket = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) else if (local_fallback_retries > 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) flocal <= in->size + local_fallback_retries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) /* exhaustive bucket search */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) retry_bucket = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) else if (ftotal < tries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) /* then retry descent */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) retry_descent = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) /* else give up */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) skip_rep = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) dprintk(" reject %d collide %d "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) "ftotal %u flocal %u\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) reject, collide, ftotal,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) flocal);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) } while (retry_bucket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) } while (retry_descent);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) if (skip_rep) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) dprintk("skip rep\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) dprintk("CHOOSE got %d\n", item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) out[outpos] = item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) outpos++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) #ifndef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) if (map->choose_tries && ftotal <= map->choose_total_tries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) map->choose_tries[ftotal]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) dprintk("CHOOSE returns %d\n", outpos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) return outpos;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) * crush_choose_indep: alternative breadth-first positionally stable mapping
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) static void crush_choose_indep(const struct crush_map *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) struct crush_work *work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) const struct crush_bucket *bucket,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) const __u32 *weight, int weight_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) int x, int left, int numrep, int type,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) int *out, int outpos,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) unsigned int tries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) unsigned int recurse_tries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) int recurse_to_leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) int *out2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) int parent_r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) const struct crush_choose_arg *choose_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) const struct crush_bucket *in = bucket;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) int endpos = outpos + left;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) int rep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) unsigned int ftotal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) int item = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) int itemtype;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) int collide;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) dprintk("CHOOSE%s INDEP bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) bucket->id, x, outpos, numrep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) /* initially my result is undefined */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) for (rep = outpos; rep < endpos; rep++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) out[rep] = CRUSH_ITEM_UNDEF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) if (out2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) out2[rep] = CRUSH_ITEM_UNDEF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) for (ftotal = 0; left > 0 && ftotal < tries; ftotal++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) #ifdef DEBUG_INDEP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) if (out2 && ftotal) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) dprintk("%u %d a: ", ftotal, left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) for (rep = outpos; rep < endpos; rep++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) dprintk(" %d", out[rep]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) dprintk("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) dprintk("%u %d b: ", ftotal, left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) for (rep = outpos; rep < endpos; rep++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) dprintk(" %d", out2[rep]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) dprintk("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) for (rep = outpos; rep < endpos; rep++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) if (out[rep] != CRUSH_ITEM_UNDEF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) in = bucket; /* initial bucket */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) /* choose through intervening buckets */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) /* note: we base the choice on the position
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) * even in the nested call. that means that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) * if the first layer chooses the same bucket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) * in a different position, we will tend to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) * choose a different item in that bucket.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) * this will involve more devices in data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) * movement and tend to distribute the load.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) r = rep + parent_r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) /* be careful */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) if (in->alg == CRUSH_BUCKET_UNIFORM &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) in->size % numrep == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) /* r'=r+(n+1)*f_total */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) r += (numrep+1) * ftotal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) /* r' = r + n*f_total */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) r += numrep * ftotal;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) /* bucket choose */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) if (in->size == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) dprintk(" empty bucket\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) item = crush_bucket_choose(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) in, work->work[-1-in->id],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) x, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) (choose_args ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) &choose_args[-1-in->id] : NULL),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) outpos);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) if (item >= map->max_devices) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) dprintk(" bad item %d\n", item);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) out[rep] = CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) if (out2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) out2[rep] = CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) left--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) /* desired type? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) if (item < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) itemtype = map->buckets[-1-item]->type;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) itemtype = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) dprintk(" item %d type %d\n", item, itemtype);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) /* keep going? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) if (itemtype != type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) if (item >= 0 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) (-1-item) >= map->max_buckets) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) dprintk(" bad item type %d\n", type);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) out[rep] = CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) if (out2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) out2[rep] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) left--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) in = map->buckets[-1-item];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) /* collision? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) collide = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) for (i = outpos; i < endpos; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) if (out[i] == item) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) collide = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) if (collide)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) if (recurse_to_leaf) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) if (item < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) crush_choose_indep(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) work,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) map->buckets[-1-item],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) weight, weight_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) x, 1, numrep, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) out2, rep,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) recurse_tries, 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) 0, NULL, r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) choose_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) if (out2[rep] == CRUSH_ITEM_NONE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) /* placed nothing; no leaf */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) /* we already have a leaf! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) out2[rep] = item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) /* out? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) if (itemtype == 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) is_out(map, weight, weight_max, item, x))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) /* yay! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) out[rep] = item;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) left--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) for (rep = outpos; rep < endpos; rep++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) if (out[rep] == CRUSH_ITEM_UNDEF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) out[rep] = CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) if (out2 && out2[rep] == CRUSH_ITEM_UNDEF) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) out2[rep] = CRUSH_ITEM_NONE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) #ifndef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) if (map->choose_tries && ftotal <= map->choose_total_tries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) map->choose_tries[ftotal]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) #ifdef DEBUG_INDEP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) if (out2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) dprintk("%u %d a: ", ftotal, left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) for (rep = outpos; rep < endpos; rep++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) dprintk(" %d", out[rep]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) dprintk("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) dprintk("%u %d b: ", ftotal, left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) for (rep = outpos; rep < endpos; rep++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) dprintk(" %d", out2[rep]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) dprintk("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) * This takes a chunk of memory and sets it up to be a shiny new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) * working area for a CRUSH placement computation. It must be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) * on any newly allocated memory before passing it in to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) * crush_do_rule. It may be used repeatedly after that, so long as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) * map has not changed. If the map /has/ changed, you must make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) * the working size is no smaller than what was allocated and re-run
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) * crush_init_workspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) * If you do retain the working space between calls to crush, make it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) * thread-local.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) void crush_init_workspace(const struct crush_map *map, void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) struct crush_work *w = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) __s32 b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) * We work by moving through the available space and setting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) * values and pointers as we go.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) * It's a bit like Forth's use of the 'allot' word since we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) * set the pointer first and then reserve the space for it to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) * point to by incrementing the point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) v += sizeof(struct crush_work);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) w->work = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) v += map->max_buckets * sizeof(struct crush_work_bucket *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) for (b = 0; b < map->max_buckets; ++b) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) if (!map->buckets[b])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) w->work[b] = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) switch (map->buckets[b]->alg) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) v += sizeof(struct crush_work_bucket);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) w->work[b]->perm_x = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) w->work[b]->perm_n = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) w->work[b]->perm = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) v += map->buckets[b]->size * sizeof(__u32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) BUG_ON(v - (void *)w != map->working_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) * crush_do_rule - calculate a mapping with the given input and rule
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) * @map: the crush_map
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) * @ruleno: the rule id
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) * @x: hash input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) * @result: pointer to result vector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) * @result_max: maximum result size
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) * @weight: weight vector (for map leaves)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) * @weight_max: size of weight vector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) * @cwin: pointer to at least crush_work_size() bytes of memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) * @choose_args: weights and ids for each known bucket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) int crush_do_rule(const struct crush_map *map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) int ruleno, int x, int *result, int result_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) const __u32 *weight, int weight_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) void *cwin, const struct crush_choose_arg *choose_args)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) int result_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) struct crush_work *cw = cwin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) int *a = cwin + map->working_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) int *b = a + result_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) int *c = b + result_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) int *w = a;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) int *o = b;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) int recurse_to_leaf;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) int wsize = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) int osize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) int *tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) const struct crush_rule *rule;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) __u32 step;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) int numrep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) int out_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) * the original choose_total_tries value was off by one (it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) * counted "retries" and not "tries"). add one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) int choose_tries = map->choose_total_tries + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) int choose_leaf_tries = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) * the local tries values were counted as "retries", though,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) * and need no adjustment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) int choose_local_retries = map->choose_local_tries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) int choose_local_fallback_retries = map->choose_local_fallback_tries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) int vary_r = map->chooseleaf_vary_r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) int stable = map->chooseleaf_stable;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) if ((__u32)ruleno >= map->max_rules) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) dprintk(" bad ruleno %d\n", ruleno);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) rule = map->rules[ruleno];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) result_len = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) for (step = 0; step < rule->len; step++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) int firstn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) const struct crush_rule_step *curstep = &rule->steps[step];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) switch (curstep->op) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) case CRUSH_RULE_TAKE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) if ((curstep->arg1 >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) curstep->arg1 < map->max_devices) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) (-1-curstep->arg1 >= 0 &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) -1-curstep->arg1 < map->max_buckets &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) map->buckets[-1-curstep->arg1])) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) w[0] = curstep->arg1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) wsize = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) dprintk(" bad take value %d\n", curstep->arg1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) case CRUSH_RULE_SET_CHOOSE_TRIES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) if (curstep->arg1 > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) choose_tries = curstep->arg1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) case CRUSH_RULE_SET_CHOOSELEAF_TRIES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) if (curstep->arg1 > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) choose_leaf_tries = curstep->arg1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) case CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) if (curstep->arg1 >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) choose_local_retries = curstep->arg1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) case CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) if (curstep->arg1 >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) choose_local_fallback_retries = curstep->arg1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) case CRUSH_RULE_SET_CHOOSELEAF_VARY_R:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) if (curstep->arg1 >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) vary_r = curstep->arg1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) case CRUSH_RULE_SET_CHOOSELEAF_STABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) if (curstep->arg1 >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) stable = curstep->arg1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) case CRUSH_RULE_CHOOSELEAF_FIRSTN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) case CRUSH_RULE_CHOOSE_FIRSTN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) firstn = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) case CRUSH_RULE_CHOOSELEAF_INDEP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) case CRUSH_RULE_CHOOSE_INDEP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) if (wsize == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) recurse_to_leaf =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) curstep->op ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) CRUSH_RULE_CHOOSELEAF_FIRSTN ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) curstep->op ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) CRUSH_RULE_CHOOSELEAF_INDEP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) /* reset output */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) osize = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) for (i = 0; i < wsize; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) int bno;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) numrep = curstep->arg1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) if (numrep <= 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) numrep += result_max;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) if (numrep <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) j = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) /* make sure bucket id is valid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) bno = -1 - w[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) if (bno < 0 || bno >= map->max_buckets) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) /* w[i] is probably CRUSH_ITEM_NONE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) dprintk(" bad w[i] %d\n", w[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) if (firstn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) int recurse_tries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) if (choose_leaf_tries)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) recurse_tries =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) choose_leaf_tries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) else if (map->chooseleaf_descend_once)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) recurse_tries = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) recurse_tries = choose_tries;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) osize += crush_choose_firstn(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) cw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) map->buckets[bno],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) weight, weight_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) x, numrep,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) curstep->arg2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) o+osize, j,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) result_max-osize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) choose_tries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) recurse_tries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) choose_local_retries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) choose_local_fallback_retries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) recurse_to_leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) vary_r,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) stable,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) c+osize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) choose_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) out_size = ((numrep < (result_max-osize)) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) numrep : (result_max-osize));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) crush_choose_indep(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) map,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) cw,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) map->buckets[bno],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) weight, weight_max,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) x, out_size, numrep,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) curstep->arg2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) o+osize, j,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) choose_tries,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) choose_leaf_tries ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) choose_leaf_tries : 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) recurse_to_leaf,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) c+osize,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) choose_args);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) osize += out_size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) if (recurse_to_leaf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) /* copy final _leaf_ values to output set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) memcpy(o, c, osize*sizeof(*o));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) /* swap o and w arrays */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) tmp = o;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) o = w;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) w = tmp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) wsize = osize;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) case CRUSH_RULE_EMIT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) for (i = 0; i < wsize && result_len < result_max; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) result[result_len] = w[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) result_len++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) wsize = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) dprintk(" unknown op %d at step %d\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) curstep->op, step);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) return result_len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) }