Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * Historical Service Time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *  Keeps a time-weighted exponential moving average of the historical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *  service time. Estimates future service time based on the historical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *  service time and the number of outstanding requests.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  *  Marks paths stale if they have not finished within hst *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  *  num_paths. If a path is stale and unused, we will send a single
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  *  request to probe in case the path has improved. This situation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  *  generally arises if the path is so much worse than others that it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  *  will never have the best estimated service time, or if the entire
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  *  multipath device is unused. If a path is stale and in use, limit the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  *  number of requests it can receive with the assumption that the path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16)  *  has become degraded.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18)  *  To avoid repeatedly calculating exponents for time weighting, times
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19)  *  are split into HST_WEIGHT_COUNT buckets each (1 >> HST_BUCKET_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20)  *  ns, and the weighting is pre-calculated.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #include "dm.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #include "dm-path-selector.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) #include <linux/blkdev.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) #define DM_MSG_PREFIX	"multipath historical-service-time"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) #define HST_MIN_IO 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) #define HST_VERSION "0.1.1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) #define HST_FIXED_SHIFT 10  /* 10 bits of decimal precision */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) #define HST_FIXED_MAX (ULLONG_MAX >> HST_FIXED_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) #define HST_FIXED_1 (1 << HST_FIXED_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) #define HST_FIXED_95 972
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) #define HST_MAX_INFLIGHT HST_FIXED_1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) #define HST_BUCKET_SHIFT 24 /* Buckets are ~ 16ms */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) #define HST_WEIGHT_COUNT 64ULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) struct selector {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	struct list_head valid_paths;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	struct list_head failed_paths;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	int valid_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 	unsigned int weights[HST_WEIGHT_COUNT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 	unsigned int threshold_multiplier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) struct path_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 	struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	struct dm_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 	unsigned int repeat_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 	u64 historical_service_time; /* Fixed point */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	u64 stale_after;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	u64 last_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	u64 outstanding;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70)  * fixed_power - compute: x^n, in O(log n) time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72)  * @x:         base of the power
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73)  * @frac_bits: fractional bits of @x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74)  * @n:         power to raise @x to.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76)  * By exploiting the relation between the definition of the natural power
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77)  * function: x^n := x*x*...*x (x multiplied by itself for n times), and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78)  * the binary encoding of numbers used by computers: n := \Sum n_i * 2^i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79)  * (where: n_i \elem {0, 1}, the binary vector representing n),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80)  * we find: x^n := x^(\Sum n_i * 2^i) := \Prod x^(n_i * 2^i), which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81)  * of course trivially computable in O(log_2 n), the length of our binary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82)  * vector.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84)  * (see: kernel/sched/loadavg.c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) static u64 fixed_power(u64 x, unsigned int frac_bits, unsigned int n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	unsigned long result = 1UL << frac_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	if (n) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 		for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 			if (n & 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 				result *= x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 				result += 1UL << (frac_bits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 				result >>= frac_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 			n >>= 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 			if (!n)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 			x *= x;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 			x += 1UL << (frac_bits - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 			x >>= frac_bits;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	return result;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)  * Calculate the next value of an exponential moving average
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)  * a_1 = a_0 * e + a * (1 - e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)  * @last: [0, ULLONG_MAX >> HST_FIXED_SHIFT]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)  * @next: [0, ULLONG_MAX >> HST_FIXED_SHIFT]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)  * @weight: [0, HST_FIXED_1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)  * Note:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)  *   To account for multiple periods in the same calculation,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)  *   a_n = a_0 * e^n + a * (1 - e^n),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)  *   so call fixed_ema(last, next, pow(weight, N))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) static u64 fixed_ema(u64 last, u64 next, u64 weight)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	last *= weight;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	last += next * (HST_FIXED_1 - weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	last += 1ULL << (HST_FIXED_SHIFT - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	return last >> HST_FIXED_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) static struct selector *alloc_selector(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	if (s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 		INIT_LIST_HEAD(&s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 		INIT_LIST_HEAD(&s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 		spin_lock_init(&s->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 		s->valid_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 	return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)  * Get the weight for a given time span.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) static u64 hst_weight(struct path_selector *ps, u64 delta)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	int bucket = clamp(delta >> HST_BUCKET_SHIFT, 0ULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 			   HST_WEIGHT_COUNT - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	return s->weights[bucket];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)  * Set up the weights array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)  * weights[len-1] = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)  * weights[n] = base ^ (n + 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) static void hst_set_weights(struct path_selector *ps, unsigned int base)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	if (base >= HST_FIXED_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 	for (i = 0; i < HST_WEIGHT_COUNT - 1; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 		s->weights[i] = fixed_power(base, HST_FIXED_SHIFT, i + 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 	s->weights[HST_WEIGHT_COUNT - 1] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) static int hst_create(struct path_selector *ps, unsigned int argc, char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	struct selector *s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	unsigned int base_weight = HST_FIXED_95;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	unsigned int threshold_multiplier = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 	char dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 	 * Arguments: [<base_weight> [<threshold_multiplier>]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 	 *   <base_weight>: Base weight for ema [0, 1024) 10-bit fixed point. A
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 	 *                  value of 0 will completely ignore any history.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	 *                  If not given, default (HST_FIXED_95) is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 	 *   <threshold_multiplier>: Minimum threshold multiplier for paths to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	 *                  be considered different. That is, a path is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 	 *                  considered different iff (p1 > N * p2) where p1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	 *                  is the path with higher service time. A threshold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	 *                  of 1 or 0 has no effect. Defaults to 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 	if (argc > 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	if (argc && (sscanf(argv[0], "%u%c", &base_weight, &dummy) != 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 	     base_weight >= HST_FIXED_1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 	if (argc > 1 && (sscanf(argv[1], "%u%c",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 				&threshold_multiplier, &dummy) != 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 	s = alloc_selector();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 	if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 	ps->context = s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 	hst_set_weights(ps, base_weight);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	s->threshold_multiplier = threshold_multiplier;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) static void free_paths(struct list_head *paths)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 	struct path_info *pi, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	list_for_each_entry_safe(pi, next, paths, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 		list_del(&pi->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 		kfree(pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) static void hst_destroy(struct path_selector *ps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	free_paths(&s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	free_paths(&s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 	kfree(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	ps->context = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) static int hst_status(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 		     status_type_t type, char *result, unsigned int maxlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	unsigned int sz = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 	struct path_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	if (!path) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 		struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 		DMEMIT("2 %u %u ", s->weights[0], s->threshold_multiplier);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 		pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 		switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 		case STATUSTYPE_INFO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 			DMEMIT("%llu %llu %llu ", pi->historical_service_time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 			       pi->outstanding, pi->stale_after);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 		case STATUSTYPE_TABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 			DMEMIT("0 ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 	return sz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) static int hst_add_path(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 		       int argc, char **argv, char **error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 	struct path_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 	unsigned int repeat_count = HST_MIN_IO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	char dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 	 * Arguments: [<repeat_count>]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 	 *   <repeat_count>: The number of I/Os before switching path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	 *                   If not given, default (HST_MIN_IO) is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 	if (argc > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 		*error = "historical-service-time ps: incorrect number of arguments";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 	if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 		*error = "historical-service-time ps: invalid repeat count";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	/* allocate the path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 	pi = kmalloc(sizeof(*pi), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	if (!pi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 		*error = "historical-service-time ps: Error allocating path context";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	pi->path = path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 	pi->repeat_count = repeat_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	pi->historical_service_time = HST_FIXED_1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 	spin_lock_init(&pi->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 	pi->outstanding = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 	pi->stale_after = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 	pi->last_finish = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 	path->pscontext = pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 	spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 	list_add_tail(&pi->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 	s->valid_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 	spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) static void hst_fail_path(struct path_selector *ps, struct dm_path *path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 	struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 	spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 	list_move(&pi->list, &s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 	s->valid_count--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 	spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) static int hst_reinstate_path(struct path_selector *ps, struct dm_path *path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 	spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 	list_move_tail(&pi->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 	s->valid_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 	spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) static void hst_fill_compare(struct path_info *pi, u64 *hst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 			     u64 *out, u64 *stale)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 	spin_lock_irqsave(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 	*hst = pi->historical_service_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 	*out = pi->outstanding;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 	*stale = pi->stale_after;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 	spin_unlock_irqrestore(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)  * Compare the estimated service time of 2 paths, pi1 and pi2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)  * for the incoming I/O.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)  * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)  * < 0 : pi1 is better
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)  * 0   : no difference between pi1 and pi2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)  * > 0 : pi2 is better
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) static long long hst_compare(struct path_info *pi1, struct path_info *pi2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 			     u64 time_now, struct path_selector *ps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	u64 hst1, hst2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 	long long out1, out2, stale1, stale2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 	int pi2_better, over_threshold;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 	hst_fill_compare(pi1, &hst1, &out1, &stale1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 	hst_fill_compare(pi2, &hst2, &out2, &stale2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 	/* Check here if estimated latency for two paths are too similar.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 	 * If this is the case, we skip extra calculation and just compare
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 	 * outstanding requests. In this case, any unloaded paths will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 	 * be preferred.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 	if (hst1 > hst2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 		over_threshold = hst1 > (s->threshold_multiplier * hst2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 		over_threshold = hst2 > (s->threshold_multiplier * hst1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 	if (!over_threshold)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 		return out1 - out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 	 * If an unloaded path is stale, choose it. If both paths are unloaded,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 	 * choose path that is the most stale.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 	 * (If one path is loaded, choose the other)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 	if ((!out1 && stale1 < time_now) || (!out2 && stale2 < time_now) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 	    (!out1 && !out2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 		return (!out2 * stale1) - (!out1 * stale2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 	/* Compare estimated service time. If outstanding is the same, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 	 * don't need to multiply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 	if (out1 == out2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 		pi2_better = hst1 > hst2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 		/* Potential overflow with out >= 1024 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 		if (unlikely(out1 >= HST_MAX_INFLIGHT ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 			     out2 >= HST_MAX_INFLIGHT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 			/* If over 1023 in-flights, we may overflow if hst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 			 * is at max. (With this shift we still overflow at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 			 * 1048576 in-flights, which is high enough).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 			hst1 >>= HST_FIXED_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 			hst2 >>= HST_FIXED_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 		pi2_better = (1 + out1) * hst1 > (1 + out2) * hst2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 	/* In the case that the 'winner' is stale, limit to equal usage. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 	if (pi2_better) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 		if (stale2 < time_now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 			return out1 - out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 	if (stale1 < time_now)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 		return out1 - out2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 	return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) static struct dm_path *hst_select_path(struct path_selector *ps,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 				       size_t nr_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 	struct path_info *pi = NULL, *best = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 	u64 time_now = sched_clock();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 	struct dm_path *ret = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 	spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 	if (list_empty(&s->valid_paths))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 	list_for_each_entry(pi, &s->valid_paths, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 		if (!best || (hst_compare(pi, best, time_now, ps) < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 			best = pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 	if (!best)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 	/* Move last used path to end (least preferred in case of ties) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 	list_move_tail(&best->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 	ret = best->path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 	spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) static int hst_start_io(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 			size_t nr_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 	struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 	spin_lock_irqsave(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 	pi->outstanding++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) 	spin_unlock_irqrestore(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) static u64 path_service_time(struct path_info *pi, u64 start_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) 	u64 sched_now = ktime_get_ns();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 	/* if a previous disk request has finished after this IO was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 	 * sent to the hardware, pretend the submission happened
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 	 * serially.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 	if (time_after64(pi->last_finish, start_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) 		start_time = pi->last_finish;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 	pi->last_finish = sched_now;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 	if (time_before64(sched_now, start_time))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 	return sched_now - start_time;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) static int hst_end_io(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) 		      size_t nr_bytes, u64 start_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 	struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 	u64 st;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 	spin_lock_irqsave(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 	st = path_service_time(pi, start_time);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) 	pi->outstanding--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 	pi->historical_service_time =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 		fixed_ema(pi->historical_service_time,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) 			  min(st * HST_FIXED_1, HST_FIXED_MAX),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 			  hst_weight(ps, st));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) 	 * On request end, mark path as fresh. If a path hasn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) 	 * finished any requests within the fresh period, the estimated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) 	 * service time is considered too optimistic and we limit the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) 	 * maximum requests on that path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 	pi->stale_after = pi->last_finish +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) 		(s->valid_count * (pi->historical_service_time >> HST_FIXED_SHIFT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) 	spin_unlock_irqrestore(&pi->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) static struct path_selector_type hst_ps = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) 	.name		= "historical-service-time",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) 	.module		= THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) 	.table_args	= 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) 	.info_args	= 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) 	.create		= hst_create,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) 	.destroy	= hst_destroy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) 	.status		= hst_status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) 	.add_path	= hst_add_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) 	.fail_path	= hst_fail_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) 	.reinstate_path	= hst_reinstate_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) 	.select_path	= hst_select_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) 	.start_io	= hst_start_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) 	.end_io		= hst_end_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) static int __init dm_hst_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) 	int r = dm_register_path_selector(&hst_ps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) 	if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) 		DMERR("register failed %d", r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) 	DMINFO("version " HST_VERSION " loaded");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 	return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) static void __exit dm_hst_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) 	int r = dm_unregister_path_selector(&hst_ps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) 	if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) 		DMERR("unregister failed %d", r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) module_init(dm_hst_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) module_exit(dm_hst_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) MODULE_DESCRIPTION(DM_NAME " measured service time oriented path selector");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) MODULE_AUTHOR("Khazhismel Kumykov <khazhy@google.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) MODULE_LICENSE("GPL");