^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * xor.c : Multiple Devices driver for Linux
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 1996, 1997, 1998, 1999, 2000,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Ingo Molnar, Matti Aarnio, Jakub Jelinek, Richard Henderson.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Dispatch optimized RAID-5 checksumming functions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #define BH_TRACE 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/raid/xor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/jiffies.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/preempt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <asm/xor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #ifndef XOR_SELECT_TEMPLATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #define XOR_SELECT_TEMPLATE(x) (x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) /* The xor routines to use. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) static struct xor_block_template *active_template;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) unsigned long *p1, *p2, *p3, *p4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) p1 = (unsigned long *) srcs[0];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) if (src_count == 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) active_template->do_2(bytes, dest, p1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) p2 = (unsigned long *) srcs[1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) if (src_count == 2) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) active_template->do_3(bytes, dest, p1, p2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) p3 = (unsigned long *) srcs[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) if (src_count == 3) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) active_template->do_4(bytes, dest, p1, p2, p3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) p4 = (unsigned long *) srcs[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) active_template->do_5(bytes, dest, p1, p2, p3, p4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) EXPORT_SYMBOL(xor_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /* Set of all registered templates. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) static struct xor_block_template *__initdata template_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #ifndef MODULE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) static void __init do_xor_register(struct xor_block_template *tmpl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) tmpl->next = template_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) template_list = tmpl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) static int __init register_xor_blocks(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) active_template = XOR_SELECT_TEMPLATE(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) if (!active_template) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #define xor_speed do_xor_register
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) // register all the templates and pick the first as the default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) XOR_TRY_TEMPLATES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #undef xor_speed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) active_template = template_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #define BENCH_SIZE 4096
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #define REPS 800U
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) static void __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) do_xor_speed(struct xor_block_template *tmpl, void *b1, void *b2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) int speed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) int i, j;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) ktime_t min, start, diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) tmpl->next = template_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) template_list = tmpl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) min = (ktime_t)S64_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) for (i = 0; i < 3; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) start = ktime_get();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) for (j = 0; j < REPS; j++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) mb(); /* prevent loop optimzation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) tmpl->do_2(BENCH_SIZE, b1, b2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) mb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) diff = ktime_sub(ktime_get(), start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) if (diff < min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) min = diff;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) // bytes/ns == GB/s, multiply by 1000 to get MB/s [not MiB/s]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) if (!min)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) min = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) speed = (1000 * REPS * BENCH_SIZE) / (unsigned int)ktime_to_ns(min);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) tmpl->speed = speed;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) pr_info(" %-16s: %5d MB/sec\n", tmpl->name, speed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) static int __init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) calibrate_xor_blocks(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) void *b1, *b2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) struct xor_block_template *f, *fastest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) fastest = XOR_SELECT_TEMPLATE(NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) if (fastest) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) printk(KERN_INFO "xor: automatically using best "
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) "checksumming function %-10s\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) fastest->name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) b1 = (void *) __get_free_pages(GFP_KERNEL, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) if (!b1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) printk(KERN_WARNING "xor: Yikes! No memory available.\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * If this arch/cpu has a short-circuited selection, don't loop through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * all the possible functions, just test the best one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) #define xor_speed(templ) do_xor_speed((templ), b1, b2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) printk(KERN_INFO "xor: measuring software checksum speed\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) template_list = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) XOR_TRY_TEMPLATES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) fastest = template_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) for (f = fastest; f; f = f->next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) if (f->speed > fastest->speed)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) fastest = f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) pr_info("xor: using function: %s (%d MB/sec)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) fastest->name, fastest->speed);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) #undef xor_speed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) free_pages((unsigned long)b1, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) active_template = fastest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) static __exit void xor_exit(void) { }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) MODULE_LICENSE("GPL");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) #ifndef MODULE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) /* when built-in xor.o must initialize before drivers/md/md.o */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) core_initcall(register_xor_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) module_init(calibrate_xor_blocks);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) module_exit(xor_exit);