^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* -*- linux-c -*- ------------------------------------------------------- *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright 2002 H. Peter Anvin - All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * ----------------------------------------------------------------------- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * raid6/sse1.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * SSE-1/MMXEXT implementation of RAID-6 syndrome functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * This is really an MMX implementation, but it requires SSE-1 or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * AMD MMXEXT for prefetch support and a few other features. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * support for nontemporal memory accesses is enough to make this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * worthwhile as a separate implementation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #ifdef CONFIG_X86_32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #include <linux/raid/pq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include "x86.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) /* Defined in raid6/mmx.c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) extern const struct raid6_mmx_constants {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) u64 x1d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) } raid6_mmx_constants;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) static int raid6_have_sse1_or_mmxext(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /* Not really boot_cpu but "all_cpus" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) return boot_cpu_has(X86_FEATURE_MMX) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) (boot_cpu_has(X86_FEATURE_XMM) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) boot_cpu_has(X86_FEATURE_MMXEXT));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * Plain SSE1 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) z0 = disks - 3; /* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) p = dptr[z0+1]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) q = dptr[z0+2]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) asm volatile("pxor %mm5,%mm5"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) for ( d = 0 ; d < bytes ; d += 8 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) asm volatile("movq %mm2,%mm4"); /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) asm volatile("movq %0,%%mm6" : : "m" (dptr[z0-1][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) for ( z = z0-2 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) asm volatile("pcmpgtb %mm4,%mm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) asm volatile("paddb %mm4,%mm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) asm volatile("pand %mm0,%mm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) asm volatile("pxor %mm5,%mm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) asm volatile("pxor %mm5,%mm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) asm volatile("pxor %mm6,%mm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) asm volatile("pxor %mm6,%mm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) asm volatile("movq %0,%%mm6" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) asm volatile("pcmpgtb %mm4,%mm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) asm volatile("paddb %mm4,%mm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) asm volatile("pand %mm0,%mm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) asm volatile("pxor %mm5,%mm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) asm volatile("pxor %mm5,%mm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) asm volatile("pxor %mm6,%mm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) asm volatile("pxor %mm6,%mm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) const struct raid6_calls raid6_sse1x1 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) raid6_sse11_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) NULL, /* XOR not yet implemented */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) raid6_have_sse1_or_mmxext,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) "sse1x1",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) 1 /* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * Unrolled-by-2 SSE1 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) z0 = disks - 3; /* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) p = dptr[z0+1]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) q = dptr[z0+2]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) asm volatile("movq %0,%%mm0" : : "m" (raid6_mmx_constants.x1d));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) asm volatile("pxor %mm5,%mm5"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) asm volatile("pxor %mm7,%mm7"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) /* We uniformly assume a single prefetch covers at least 16 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) for ( d = 0 ; d < bytes ; d += 16 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) asm volatile("movq %0,%%mm2" : : "m" (dptr[z0][d])); /* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) asm volatile("movq %0,%%mm3" : : "m" (dptr[z0][d+8])); /* P[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) asm volatile("movq %mm2,%mm4"); /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) asm volatile("movq %mm3,%mm6"); /* Q[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) for ( z = z0-1 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) asm volatile("pcmpgtb %mm4,%mm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) asm volatile("pcmpgtb %mm6,%mm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) asm volatile("paddb %mm4,%mm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) asm volatile("paddb %mm6,%mm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) asm volatile("pand %mm0,%mm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) asm volatile("pand %mm0,%mm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) asm volatile("pxor %mm5,%mm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) asm volatile("pxor %mm7,%mm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) asm volatile("movq %0,%%mm5" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) asm volatile("movq %0,%%mm7" : : "m" (dptr[z][d+8]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) asm volatile("pxor %mm5,%mm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) asm volatile("pxor %mm7,%mm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) asm volatile("pxor %mm5,%mm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) asm volatile("pxor %mm7,%mm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) asm volatile("pxor %mm5,%mm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) asm volatile("pxor %mm7,%mm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) asm volatile("movntq %%mm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) asm volatile("movntq %%mm3,%0" : "=m" (p[d+8]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) asm volatile("movntq %%mm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) asm volatile("movntq %%mm6,%0" : "=m" (q[d+8]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) asm volatile("sfence" : :: "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) const struct raid6_calls raid6_sse1x2 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) raid6_sse12_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) NULL, /* XOR not yet implemented */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) raid6_have_sse1_or_mmxext,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) "sse1x2",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 1 /* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) #endif