^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Historical Service Time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Keeps a time-weighted exponential moving average of the historical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * service time. Estimates future service time based on the historical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * service time and the number of outstanding requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Marks paths stale if they have not finished within hst *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * num_paths. If a path is stale and unused, we will send a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * request to probe in case the path has improved. This situation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * generally arises if the path is so much worse than others that it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * will never have the best estimated service time, or if the entire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * multipath device is unused. If a path is stale and in use, limit the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * number of requests it can receive with the assumption that the path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * has become degraded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * To avoid repeatedly calculating exponents for time weighting, times
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * are split into HST_WEIGHT_COUNT buckets each (1 >> HST_BUCKET_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * ns, and the weighting is pre-calculated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #include "dm.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #include "dm-path-selector.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define DM_MSG_PREFIX "multipath historical-service-time"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #define HST_MIN_IO 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #define HST_VERSION "0.1.1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #define HST_FIXED_SHIFT 10 /* 10 bits of decimal precision */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define HST_FIXED_MAX (ULLONG_MAX >> HST_FIXED_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #define HST_FIXED_1 (1 << HST_FIXED_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #define HST_FIXED_95 972
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #define HST_MAX_INFLIGHT HST_FIXED_1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #define HST_BUCKET_SHIFT 24 /* Buckets are ~ 16ms */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #define HST_WEIGHT_COUNT 64ULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) struct selector {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) struct list_head valid_paths;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) struct list_head failed_paths;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) int valid_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) unsigned int weights[HST_WEIGHT_COUNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) unsigned int threshold_multiplier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) struct path_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) struct dm_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) unsigned int repeat_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) u64 historical_service_time; /* Fixed point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) u64 stale_after;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) u64 last_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) u64 outstanding;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * fixed_power - compute: x^n, in O(log n) time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * @x: base of the power
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * @frac_bits: fractional bits of @x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * @n: power to raise @x to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * By exploiting the relation between the definition of the natural power
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * function: x^n := x*x*...*x (x multiplied by itself for n times), and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) * (where: n_i \elem {0, 1}, the binary vector representing n),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) * of course trivially computable in O(log_2 n), the length of our binary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * vector.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * (see: kernel/sched/loadavg.c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) static u64 fixed_power(u64 x, unsigned int frac_bits, unsigned int n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) unsigned long result = 1UL << frac_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) if (n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) if (n & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) result *= x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) result += 1UL << (frac_bits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) result >>= frac_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) n >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) if (!n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) x *= x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) x += 1UL << (frac_bits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) x >>= frac_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * Calculate the next value of an exponential moving average
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * a_1 = a_0 * e + a * (1 - e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) * @last: [0, ULLONG_MAX >> HST_FIXED_SHIFT]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) * @next: [0, ULLONG_MAX >> HST_FIXED_SHIFT]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) * @weight: [0, HST_FIXED_1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * Note:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * To account for multiple periods in the same calculation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * a_n = a_0 * e^n + a * (1 - e^n),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * so call fixed_ema(last, next, pow(weight, N))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) static u64 fixed_ema(u64 last, u64 next, u64 weight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) last *= weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) last += next * (HST_FIXED_1 - weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) last += 1ULL << (HST_FIXED_SHIFT - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) return last >> HST_FIXED_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) static struct selector *alloc_selector(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) if (s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) INIT_LIST_HEAD(&s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) INIT_LIST_HEAD(&s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) spin_lock_init(&s->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) s->valid_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * Get the weight for a given time span.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) static u64 hst_weight(struct path_selector *ps, u64 delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) int bucket = clamp(delta >> HST_BUCKET_SHIFT, 0ULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) HST_WEIGHT_COUNT - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) return s->weights[bucket];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) * Set up the weights array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) * weights[len-1] = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) * weights[n] = base ^ (n + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) static void hst_set_weights(struct path_selector *ps, unsigned int base)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) if (base >= HST_FIXED_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) for (i = 0; i < HST_WEIGHT_COUNT - 1; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) s->weights[i] = fixed_power(base, HST_FIXED_SHIFT, i + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) s->weights[HST_WEIGHT_COUNT - 1] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) static int hst_create(struct path_selector *ps, unsigned int argc, char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) struct selector *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) unsigned int base_weight = HST_FIXED_95;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) unsigned int threshold_multiplier = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) char dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) * Arguments: [<base_weight> [<threshold_multiplier>]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) * <base_weight>: Base weight for ema [0, 1024) 10-bit fixed point. A
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * value of 0 will completely ignore any history.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * If not given, default (HST_FIXED_95) is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * <threshold_multiplier>: Minimum threshold multiplier for paths to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * be considered different. That is, a path is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * considered different iff (p1 > N * p2) where p1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) * is the path with higher service time. A threshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) * of 1 or 0 has no effect. Defaults to 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) if (argc > 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) if (argc && (sscanf(argv[0], "%u%c", &base_weight, &dummy) != 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) base_weight >= HST_FIXED_1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) if (argc > 1 && (sscanf(argv[1], "%u%c",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) &threshold_multiplier, &dummy) != 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) s = alloc_selector();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) ps->context = s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) hst_set_weights(ps, base_weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) s->threshold_multiplier = threshold_multiplier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) static void free_paths(struct list_head *paths)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) struct path_info *pi, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) list_for_each_entry_safe(pi, next, paths, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) list_del(&pi->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) kfree(pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) static void hst_destroy(struct path_selector *ps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) free_paths(&s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) free_paths(&s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) kfree(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) ps->context = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) static int hst_status(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) status_type_t type, char *result, unsigned int maxlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) unsigned int sz = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) struct path_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) if (!path) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) DMEMIT("2 %u %u ", s->weights[0], s->threshold_multiplier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) case STATUSTYPE_INFO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) DMEMIT("%llu %llu %llu ", pi->historical_service_time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) pi->outstanding, pi->stale_after);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) case STATUSTYPE_TABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) DMEMIT("0 ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) return sz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) static int hst_add_path(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) int argc, char **argv, char **error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) struct path_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) unsigned int repeat_count = HST_MIN_IO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) char dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * Arguments: [<repeat_count>]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * <repeat_count>: The number of I/Os before switching path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * If not given, default (HST_MIN_IO) is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) if (argc > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) *error = "historical-service-time ps: incorrect number of arguments";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) *error = "historical-service-time ps: invalid repeat count";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) /* allocate the path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) pi = kmalloc(sizeof(*pi), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) if (!pi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) *error = "historical-service-time ps: Error allocating path context";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) pi->path = path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) pi->repeat_count = repeat_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) pi->historical_service_time = HST_FIXED_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) spin_lock_init(&pi->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) pi->outstanding = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) pi->stale_after = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) pi->last_finish = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) path->pscontext = pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) list_add_tail(&pi->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) s->valid_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) static void hst_fail_path(struct path_selector *ps, struct dm_path *path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) list_move(&pi->list, &s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) s->valid_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) static int hst_reinstate_path(struct path_selector *ps, struct dm_path *path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) list_move_tail(&pi->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) s->valid_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) static void hst_fill_compare(struct path_info *pi, u64 *hst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) u64 *out, u64 *stale)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) spin_lock_irqsave(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) *hst = pi->historical_service_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) *out = pi->outstanding;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) *stale = pi->stale_after;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) spin_unlock_irqrestore(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) * Compare the estimated service time of 2 paths, pi1 and pi2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) * for the incoming I/O.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) * < 0 : pi1 is better
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) * 0 : no difference between pi1 and pi2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) * > 0 : pi2 is better
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) static long long hst_compare(struct path_info *pi1, struct path_info *pi2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) u64 time_now, struct path_selector *ps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) u64 hst1, hst2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) long long out1, out2, stale1, stale2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) int pi2_better, over_threshold;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) hst_fill_compare(pi1, &hst1, &out1, &stale1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) hst_fill_compare(pi2, &hst2, &out2, &stale2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) /* Check here if estimated latency for two paths are too similar.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) * If this is the case, we skip extra calculation and just compare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * outstanding requests. In this case, any unloaded paths will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) * be preferred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) if (hst1 > hst2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) over_threshold = hst1 > (s->threshold_multiplier * hst2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) over_threshold = hst2 > (s->threshold_multiplier * hst1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) if (!over_threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) return out1 - out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) * If an unloaded path is stale, choose it. If both paths are unloaded,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) * choose path that is the most stale.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) * (If one path is loaded, choose the other)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) if ((!out1 && stale1 < time_now) || (!out2 && stale2 < time_now) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) (!out1 && !out2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) return (!out2 * stale1) - (!out1 * stale2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) /* Compare estimated service time. If outstanding is the same, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) * don't need to multiply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) if (out1 == out2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) pi2_better = hst1 > hst2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) /* Potential overflow with out >= 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) if (unlikely(out1 >= HST_MAX_INFLIGHT ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) out2 >= HST_MAX_INFLIGHT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) /* If over 1023 in-flights, we may overflow if hst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) * is at max. (With this shift we still overflow at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) * 1048576 in-flights, which is high enough).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) hst1 >>= HST_FIXED_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) hst2 >>= HST_FIXED_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) pi2_better = (1 + out1) * hst1 > (1 + out2) * hst2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) /* In the case that the 'winner' is stale, limit to equal usage. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) if (pi2_better) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) if (stale2 < time_now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) return out1 - out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) if (stale1 < time_now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) return out1 - out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) static struct dm_path *hst_select_path(struct path_selector *ps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) size_t nr_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) struct path_info *pi = NULL, *best = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) u64 time_now = sched_clock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) struct dm_path *ret = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) if (list_empty(&s->valid_paths))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) list_for_each_entry(pi, &s->valid_paths, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) if (!best || (hst_compare(pi, best, time_now, ps) < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) best = pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) if (!best)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) /* Move last used path to end (least preferred in case of ties) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) list_move_tail(&best->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) ret = best->path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) static int hst_start_io(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) size_t nr_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) spin_lock_irqsave(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) pi->outstanding++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) spin_unlock_irqrestore(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) static u64 path_service_time(struct path_info *pi, u64 start_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) u64 sched_now = ktime_get_ns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) /* if a previous disk request has finished after this IO was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) * sent to the hardware, pretend the submission happened
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) * serially.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) if (time_after64(pi->last_finish, start_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) start_time = pi->last_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) pi->last_finish = sched_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) if (time_before64(sched_now, start_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) return sched_now - start_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) static int hst_end_io(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) size_t nr_bytes, u64 start_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) u64 st;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) spin_lock_irqsave(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) st = path_service_time(pi, start_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) pi->outstanding--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) pi->historical_service_time =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) fixed_ema(pi->historical_service_time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) min(st * HST_FIXED_1, HST_FIXED_MAX),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) hst_weight(ps, st));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) * On request end, mark path as fresh. If a path hasn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * finished any requests within the fresh period, the estimated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * service time is considered too optimistic and we limit the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * maximum requests on that path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) pi->stale_after = pi->last_finish +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) (s->valid_count * (pi->historical_service_time >> HST_FIXED_SHIFT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) spin_unlock_irqrestore(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) static struct path_selector_type hst_ps = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) .name = "historical-service-time",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) .module = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) .table_args = 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) .info_args = 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) .create = hst_create,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) .destroy = hst_destroy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) .status = hst_status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) .add_path = hst_add_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) .fail_path = hst_fail_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) .reinstate_path = hst_reinstate_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) .select_path = hst_select_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) .start_io = hst_start_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) .end_io = hst_end_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) static int __init dm_hst_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) int r = dm_register_path_selector(&hst_ps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) DMERR("register failed %d", r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) DMINFO("version " HST_VERSION " loaded");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) static void __exit dm_hst_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) int r = dm_unregister_path_selector(&hst_ps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) DMERR("unregister failed %d", r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) module_init(dm_hst_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) module_exit(dm_hst_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) MODULE_DESCRIPTION(DM_NAME " measured service time oriented path selector");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) MODULE_AUTHOR("Khazhismel Kumykov <khazhy@google.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) MODULE_LICENSE("GPL");