^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Copyright (C) 2007-2009 NEC Corporation. All Rights Reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Module Author: Kiyoshi Ueda
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * This file is released under the GPL.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Throughput oriented path selector.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include "dm.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "dm-path-selector.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #define DM_MSG_PREFIX "multipath service-time"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #define ST_MIN_IO 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #define ST_MAX_RELATIVE_THROUGHPUT 100
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #define ST_MAX_RELATIVE_THROUGHPUT_SHIFT 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #define ST_MAX_INFLIGHT_SIZE ((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define ST_VERSION "0.3.0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) struct selector {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) struct list_head valid_paths;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) struct list_head failed_paths;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) spinlock_t lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) struct path_info {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) struct list_head list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) struct dm_path *path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) unsigned repeat_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) unsigned relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) atomic_t in_flight_size; /* Total size of in-flight I/Os */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) static struct selector *alloc_selector(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) if (s) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) INIT_LIST_HEAD(&s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) INIT_LIST_HEAD(&s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) spin_lock_init(&s->lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) return s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) static int st_create(struct path_selector *ps, unsigned argc, char **argv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) struct selector *s = alloc_selector();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) if (!s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) ps->context = s;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) static void free_paths(struct list_head *paths)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) struct path_info *pi, *next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) list_for_each_entry_safe(pi, next, paths, list) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) list_del(&pi->list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) kfree(pi);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) static void st_destroy(struct path_selector *ps)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) free_paths(&s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) free_paths(&s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) kfree(s);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) ps->context = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) static int st_status(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) status_type_t type, char *result, unsigned maxlen)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) unsigned sz = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) struct path_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) if (!path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) DMEMIT("0 ");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) switch (type) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) case STATUSTYPE_INFO:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) DMEMIT("%d %u ", atomic_read(&pi->in_flight_size),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) pi->relative_throughput);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) case STATUSTYPE_TABLE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) DMEMIT("%u %u ", pi->repeat_count,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) pi->relative_throughput);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) return sz;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) static int st_add_path(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) int argc, char **argv, char **error)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) struct path_info *pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) unsigned repeat_count = ST_MIN_IO;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) unsigned relative_throughput = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) char dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * Arguments: [<repeat_count> [<relative_throughput>]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * <repeat_count>: The number of I/Os before switching path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) * If not given, default (ST_MIN_IO) is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * <relative_throughput>: The relative throughput value of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * the path among all paths in the path-group.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) * If not given, minimum value '1' is used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) * If '0' is given, the path isn't selected while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * other paths having a positive value are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * available.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) if (argc > 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) *error = "service-time ps: incorrect number of arguments";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) if (argc && (sscanf(argv[0], "%u%c", &repeat_count, &dummy) != 1)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) *error = "service-time ps: invalid repeat count";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) if (repeat_count > 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) DMWARN_LIMIT("repeat_count > 1 is deprecated, using 1 instead");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) repeat_count = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) if ((argc == 2) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) (sscanf(argv[1], "%u%c", &relative_throughput, &dummy) != 1 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) *error = "service-time ps: invalid relative_throughput value";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) /* allocate the path */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) pi = kmalloc(sizeof(*pi), GFP_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) if (!pi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) *error = "service-time ps: Error allocating path context";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) pi->path = path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) pi->repeat_count = repeat_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) pi->relative_throughput = relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) atomic_set(&pi->in_flight_size, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) path->pscontext = pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) list_add_tail(&pi->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) static void st_fail_path(struct path_selector *ps, struct dm_path *path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) list_move(&pi->list, &s->failed_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) static int st_reinstate_path(struct path_selector *ps, struct dm_path *path)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) list_move_tail(&pi->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * Compare the estimated service time of 2 paths, pi1 and pi2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * for the incoming I/O.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) * Returns:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) * < 0 : pi1 is better
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * 0 : no difference between pi1 and pi2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * > 0 : pi2 is better
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * Description:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * Basically, the service time is estimated by:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) * To reduce the calculation, some optimizations are made.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) * (See comments inline)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) static int st_compare_load(struct path_info *pi1, struct path_info *pi2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) size_t incoming)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) size_t sz1, sz2, st1, st2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) sz1 = atomic_read(&pi1->in_flight_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) sz2 = atomic_read(&pi2->in_flight_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * Case 1: Both have same throughput value. Choose less loaded path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) if (pi1->relative_throughput == pi2->relative_throughput)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) return sz1 - sz2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * Case 2a: Both have same load. Choose higher throughput path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * Case 2b: One path has no throughput value. Choose the other one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) if (sz1 == sz2 ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) !pi1->relative_throughput || !pi2->relative_throughput)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) return pi2->relative_throughput - pi1->relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) * Case 3: Calculate service time. Choose faster path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) * Service time using pi1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) * st1 = (sz1 + incoming) / pi1->relative_throughput
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * Service time using pi2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * st2 = (sz2 + incoming) / pi2->relative_throughput
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * To avoid the division, transform the expression to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * multiplication.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * Because ->relative_throughput > 0 here, if st1 < st2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * the expressions below are the same meaning:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * (sz1 + incoming) / pi1->relative_throughput <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * (sz2 + incoming) / pi2->relative_throughput
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * (sz1 + incoming) * pi2->relative_throughput <
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * (sz2 + incoming) * pi1->relative_throughput
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * So use the later one.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) sz1 += incoming;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) sz2 += incoming;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) sz2 >= ST_MAX_INFLIGHT_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * Size may be too big for multiplying pi->relative_throughput
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * and overflow.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) * To avoid the overflow and mis-selection, shift down both.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) st1 = sz1 * pi2->relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) st2 = sz2 * pi1->relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) if (st1 != st2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) return st1 - st2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * Case 4: Service time is equal. Choose higher throughput path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) return pi2->relative_throughput - pi1->relative_throughput;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) static struct dm_path *st_select_path(struct path_selector *ps, size_t nr_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) struct selector *s = ps->context;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) struct path_info *pi = NULL, *best = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) struct dm_path *ret = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) spin_lock_irqsave(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) if (list_empty(&s->valid_paths))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) list_for_each_entry(pi, &s->valid_paths, list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) if (!best || (st_compare_load(pi, best, nr_bytes) < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) best = pi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) if (!best)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) /* Move most recently used to least preferred to evenly balance. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) list_move_tail(&best->list, &s->valid_paths);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) ret = best->path;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) spin_unlock_irqrestore(&s->lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) static int st_start_io(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) size_t nr_bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) atomic_add(nr_bytes, &pi->in_flight_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) static int st_end_io(struct path_selector *ps, struct dm_path *path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) size_t nr_bytes, u64 start_time)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) struct path_info *pi = path->pscontext;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) atomic_sub(nr_bytes, &pi->in_flight_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) static struct path_selector_type st_ps = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) .name = "service-time",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) .module = THIS_MODULE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) .table_args = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) .info_args = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) .create = st_create,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) .destroy = st_destroy,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) .status = st_status,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) .add_path = st_add_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) .fail_path = st_fail_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) .reinstate_path = st_reinstate_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) .select_path = st_select_path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) .start_io = st_start_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) .end_io = st_end_io,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) static int __init dm_st_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) int r = dm_register_path_selector(&st_ps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) DMERR("register failed %d", r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) DMINFO("version " ST_VERSION " loaded");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) static void __exit dm_st_exit(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) int r = dm_unregister_path_selector(&st_ps);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) if (r < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) DMERR("unregister failed %d", r);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) module_init(dm_st_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) module_exit(dm_st_exit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) MODULE_LICENSE("GPL");