Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2)  * Copyright (C) 2007-2009 NEC Corporation.  All Rights Reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  * Module Author: Kiyoshi Ueda
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * This file is released under the GPL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  * Throughput oriented path selector.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) #include "dm.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) #include "dm-path-selector.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #define DM_MSG_PREFIX	"multipath service-time"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #define ST_MIN_IO	1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) #define ST_MAX_RELATIVE_THROUGHPUT	100
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #define ST_MAX_RELATIVE_THROUGHPUT_SHIFT	7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #define ST_MAX_INFLIGHT_SIZE	((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) #define ST_VERSION	"0.3.0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) struct selector {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) 	struct list_head valid_paths;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 	struct list_head failed_paths;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 	spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) struct path_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 	struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 	struct dm_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 	unsigned repeat_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 	unsigned relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	atomic_t in_flight_size;	/* Total size of in-flight I/Os */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) static struct selector *alloc_selector(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 	struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	if (s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 		INIT_LIST_HEAD(&s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 		INIT_LIST_HEAD(&s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 		spin_lock_init(&s->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) static int st_create(struct path_selector *ps, unsigned argc, char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 	struct selector *s = alloc_selector();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 	if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	ps->context = s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) static void free_paths(struct list_head *paths)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	struct path_info *pi, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	list_for_each_entry_safe(pi, next, paths, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 		list_del(&pi->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 		kfree(pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) static void st_destroy(struct path_selector *ps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	free_paths(&s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	free_paths(&s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	kfree(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	ps->context = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) static int st_status(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 		     status_type_t type, char *result, unsigned maxlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	unsigned sz = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	struct path_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 		DMEMIT("0 ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 		pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 		switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 		case STATUSTYPE_INFO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 			DMEMIT("%d %u ", atomic_read(&pi->in_flight_size),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 			       pi->relative_throughput);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 		case STATUSTYPE_TABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 			DMEMIT("%u %u ", pi->repeat_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 			       pi->relative_throughput);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	return sz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) static int st_add_path(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 		       int argc, char **argv, char **error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	struct path_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	unsigned repeat_count = ST_MIN_IO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	unsigned relative_throughput = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	char dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	 * Arguments: [<repeat_count> [<relative_throughput>]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	 * 	<repeat_count>: The number of I/Os before switching path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	 * 			If not given, default (ST_MIN_IO) is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	 * 	<relative_throughput>: The relative throughput value of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	 *			the path among all paths in the path-group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	 * 			The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	 *			If not given, minimum value '1' is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	 *			If '0' is given, the path isn't selected while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	 * 			other paths having a positive value are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	 * 			available.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	if (argc > 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 		*error = "service-time ps: incorrect number of arguments";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 	if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 		*error = "service-time ps: invalid repeat count";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	if (repeat_count > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 		DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 		repeat_count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	if ((argc == 2) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	    (sscanf(argv[1], "%u%c", &relative_throughput, &dummy) != 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 	     relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 		*error = "service-time ps: invalid relative_throughput value";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 	/* allocate the path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	pi = kmalloc(sizeof(*pi), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 	if (!pi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 		*error = "service-time ps: Error allocating path context";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	pi->path = path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	pi->repeat_count = repeat_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	pi->relative_throughput = relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	atomic_set(&pi->in_flight_size, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	path->pscontext = pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	list_add_tail(&pi->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) static void st_fail_path(struct path_selector *ps, struct dm_path *path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 	list_move(&pi->list, &s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 	spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) static int st_reinstate_path(struct path_selector *ps, struct dm_path *path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 	struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	list_move_tail(&pi->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 	spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)  * Compare the estimated service time of 2 paths, pi1 and pi2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199)  * for the incoming I/O.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)  * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)  * < 0 : pi1 is better
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)  * 0   : no difference between pi1 and pi2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)  * > 0 : pi2 is better
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)  * Description:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)  * Basically, the service time is estimated by:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)  *     ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)  * To reduce the calculation, some optimizations are made.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)  * (See comments inline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 			   size_t incoming)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 	size_t sz1, sz2, st1, st2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 	sz1 = atomic_read(&pi1->in_flight_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 	sz2 = atomic_read(&pi2->in_flight_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	 * Case 1: Both have same throughput value. Choose less loaded path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 	if (pi1->relative_throughput == pi2->relative_throughput)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 		return sz1 - sz2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 	 * Case 2a: Both have same load. Choose higher throughput path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 	 * Case 2b: One path has no throughput value. Choose the other one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	if (sz1 == sz2 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	    !pi1->relative_throughput || !pi2->relative_throughput)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 		return pi2->relative_throughput - pi1->relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 	 * Case 3: Calculate service time. Choose faster path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 	 *         Service time using pi1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 	 *             st1 = (sz1 + incoming) / pi1->relative_throughput
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 	 *         Service time using pi2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 	 *             st2 = (sz2 + incoming) / pi2->relative_throughput
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 	 *         To avoid the division, transform the expression to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 	 *         multiplication.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	 *         Because ->relative_throughput > 0 here, if st1 < st2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	 *         the expressions below are the same meaning:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	 *             (sz1 + incoming) / pi1->relative_throughput <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	 *                 (sz2 + incoming) / pi2->relative_throughput
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	 *             (sz1 + incoming) * pi2->relative_throughput <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	 *                 (sz2 + incoming) * pi1->relative_throughput
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	 *         So use the later one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 	sz1 += incoming;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	sz2 += incoming;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 		     sz2 >= ST_MAX_INFLIGHT_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 		 * Size may be too big for multiplying pi->relative_throughput
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 		 * and overflow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 		 * To avoid the overflow and mis-selection, shift down both.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 		sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 		sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 	st1 = sz1 * pi2->relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	st2 = sz2 * pi1->relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 	if (st1 != st2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 		return st1 - st2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 	 * Case 4: Service time is equal. Choose higher throughput path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 	return pi2->relative_throughput - pi1->relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) static struct dm_path *st_select_path(struct path_selector *ps, size_t nr_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 	struct path_info *pi = NULL, *best = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 	struct dm_path *ret = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 	if (list_empty(&s->valid_paths))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 	list_for_each_entry(pi, &s->valid_paths, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 		if (!best || (st_compare_load(pi, best, nr_bytes) < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 			best = pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 	if (!best)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 	/* Move most recently used to least preferred to evenly balance. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	list_move_tail(&best->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	ret = best->path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) static int st_start_io(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 		       size_t nr_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 	struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 	atomic_add(nr_bytes, &pi->in_flight_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) static int st_end_io(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 		     size_t nr_bytes, u64 start_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 	struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 	atomic_sub(nr_bytes, &pi->in_flight_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) static struct path_selector_type st_ps = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 	.name		= "service-time",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 	.module		= THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 	.table_args	= 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 	.info_args	= 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 	.create		= st_create,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 	.destroy	= st_destroy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 	.status		= st_status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 	.add_path	= st_add_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 	.fail_path	= st_fail_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	.reinstate_path	= st_reinstate_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 	.select_path	= st_select_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 	.start_io	= st_start_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 	.end_io		= st_end_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) static int __init dm_st_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	int r = dm_register_path_selector(&st_ps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 	if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 		DMERR("register failed %d", r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 	DMINFO("version " ST_VERSION " loaded");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 	return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) static void __exit dm_st_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 	int r = dm_unregister_path_selector(&st_ps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 	if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 		DMERR("unregister failed %d", r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) module_init(dm_st_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) module_exit(dm_st_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) MODULE_LICENSE("GPL");