^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2016 Intel Corporation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Author: Gayatri Kammela <gayatri.kammela@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Author: Megha Dey <megha.dey@linux.intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #ifdef CONFIG_AS_AVX512
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/raid/pq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include "x86.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) static int raid6_has_avx512(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) return boot_cpu_has(X86_FEATURE_AVX2) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) boot_cpu_has(X86_FEATURE_AVX) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) boot_cpu_has(X86_FEATURE_AVX512F) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) boot_cpu_has(X86_FEATURE_AVX512BW) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) boot_cpu_has(X86_FEATURE_AVX512VL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) boot_cpu_has(X86_FEATURE_AVX512DQ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) static void raid6_2data_recov_avx512(int disks, size_t bytes, int faila,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) int failb, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) u8 *p, *q, *dp, *dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) const u8 *pbmul; /* P multiplier table for B data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) const u8 *qmul; /* Q multiplier table (for both) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) const u8 x0f = 0x0f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) p = (u8 *)ptrs[disks-2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) q = (u8 *)ptrs[disks-1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * Compute syndrome with zero for the missing data pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * Use the dead data pages as temporary storage for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * delta p and delta q
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) dp = (u8 *)ptrs[faila];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) ptrs[faila] = (void *)raid6_empty_zero_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) ptrs[disks-2] = dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) dq = (u8 *)ptrs[failb];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) ptrs[failb] = (void *)raid6_empty_zero_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) ptrs[disks-1] = dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) raid6_call.gen_syndrome(disks, bytes, ptrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) /* Restore pointer table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) ptrs[faila] = dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) ptrs[failb] = dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) ptrs[disks-2] = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) ptrs[disks-1] = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /* Now, pick the proper data tables */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) raid6_gfexp[failb]]];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /* zmm0 = x0f[16] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) while (bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) asm volatile("vmovdqa64 %0, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) "vmovdqa64 %1, %%zmm9\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) "vmovdqa64 %2, %%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) "vmovdqa64 %3, %%zmm8\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) "vpxorq %4, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) "vpxorq %5, %%zmm9, %%zmm9\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) "vpxorq %6, %%zmm0, %%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) "vpxorq %7, %%zmm8, %%zmm8"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) : "m" (q[0]), "m" (q[64]), "m" (p[0]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) "m" (p[64]), "m" (dq[0]), "m" (dq[64]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) "m" (dp[0]), "m" (dp[64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) * 1 = dq[0] ^ q[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) * 9 = dq[64] ^ q[64]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) * 0 = dp[0] ^ p[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * 8 = dp[64] ^ p[64]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) "vbroadcasti64x2 %1, %%zmm5"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) : "m" (qmul[0]), "m" (qmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) "vpsraw $4, %%zmm9, %%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) "vpandq %%zmm7, %%zmm9, %%zmm9\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) "vpshufb %%zmm9, %%zmm4, %%zmm14\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) "vpshufb %%zmm12, %%zmm5, %%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) "vpxorq %%zmm14, %%zmm15, %%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) "vpxorq %%zmm4, %%zmm5, %%zmm5"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) : );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) * 5 = qx[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * 15 = qx[64]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) "vbroadcasti64x2 %1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) "vpsraw $4, %%zmm0, %%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) "vpsraw $4, %%zmm8, %%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) "vpandq %%zmm7, %%zmm8, %%zmm14\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) "vpshufb %%zmm14, %%zmm4, %%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) "vpshufb %%zmm6, %%zmm1, %%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) "vpxorq %%zmm4, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) "vpxorq %%zmm12, %%zmm13, %%zmm13"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) : "m" (pbmul[0]), "m" (pbmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * 1 = pbmul[px[0]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * 13 = pbmul[px[64]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) "vpxorq %%zmm15, %%zmm13, %%zmm13"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) : );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) * 1 = db = DQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) * 13 = db[64] = DQ[64]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) asm volatile("vmovdqa64 %%zmm1, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) "vmovdqa64 %%zmm13,%1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) "vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) "vpxorq %%zmm13, %%zmm8, %%zmm8"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) : "m" (dq[0]), "m" (dq[64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) asm volatile("vmovdqa64 %%zmm0, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) "vmovdqa64 %%zmm8, %1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) : "m" (dp[0]), "m" (dp[64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) bytes -= 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) p += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) q += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) dp += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) dq += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) asm volatile("vmovdqa64 %0, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) "vmovdqa64 %1, %%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) "vpxorq %2, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) "vpxorq %3, %%zmm0, %%zmm0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) : "m" (*q), "m" (*p), "m"(*dq), "m" (*dp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /* 1 = dq ^ q; 0 = dp ^ p */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) "vbroadcasti64x2 %1, %%zmm5"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) : "m" (qmul[0]), "m" (qmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) * 1 = dq ^ q
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) * 3 = dq ^ p >> 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) asm volatile("vpsraw $4, %%zmm1, %%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) "vpandq %%zmm7, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) "vpshufb %%zmm1, %%zmm4, %%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) "vpshufb %%zmm3, %%zmm5, %%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) "vpxorq %%zmm4, %%zmm5, %%zmm5"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) : );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /* 5 = qx */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) asm volatile("vbroadcasti64x2 %0, %%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) "vbroadcasti64x2 %1, %%zmm1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) : "m" (pbmul[0]), "m" (pbmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) asm volatile("vpsraw $4, %%zmm0, %%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) "vpandq %%zmm7, %%zmm0, %%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) "vpandq %%zmm7, %%zmm2, %%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) "vpshufb %%zmm3, %%zmm4, %%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) "vpshufb %%zmm2, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) "vpxorq %%zmm4, %%zmm1, %%zmm1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) : );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) /* 1 = pbmul[px] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) asm volatile("vpxorq %%zmm5, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) /* 1 = db = DQ */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) "vmovdqa64 %%zmm1, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) : "m" (dq[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) asm volatile("vpxorq %%zmm1, %%zmm0, %%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) "vmovdqa64 %%zmm0, %0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) : "m" (dp[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) bytes -= 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) p += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) q += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) dp += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) dq += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) static void raid6_datap_recov_avx512(int disks, size_t bytes, int faila,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) u8 *p, *q, *dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) const u8 *qmul; /* Q multiplier table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) const u8 x0f = 0x0f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) p = (u8 *)ptrs[disks-2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) q = (u8 *)ptrs[disks-1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * Compute syndrome with zero for the missing data page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) * Use the dead data page as temporary storage for delta q
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) dq = (u8 *)ptrs[faila];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) ptrs[faila] = (void *)raid6_empty_zero_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) ptrs[disks-1] = dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) raid6_call.gen_syndrome(disks, bytes, ptrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) /* Restore pointer table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) ptrs[faila] = dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) ptrs[disks-1] = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) /* Now, pick the proper data tables */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) asm volatile("vpbroadcastb %0, %%zmm7" : : "m" (x0f));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) while (bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) asm volatile("vmovdqa64 %0, %%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) "vmovdqa64 %1, %%zmm8\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) "vpxorq %2, %%zmm3, %%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) "vpxorq %3, %%zmm8, %%zmm8"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) : "m" (dq[0]), "m" (dq[64]), "m" (q[0]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) "m" (q[64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * 3 = q[0] ^ dq[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) * 8 = q[64] ^ dq[64]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) "vmovapd %%zmm0, %%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) "vbroadcasti64x2 %1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) "vmovapd %%zmm1, %%zmm14"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) : "m" (qmul[0]), "m" (qmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) "vpsraw $4, %%zmm8, %%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) "vpandq %%zmm7, %%zmm8, %%zmm8\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) "vpandq %%zmm7, %%zmm12, %%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) "vpshufb %%zmm8, %%zmm13, %%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) "vpshufb %%zmm12, %%zmm14, %%zmm14\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) "vpxorq %%zmm0, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) "vpxorq %%zmm13, %%zmm14, %%zmm14"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) : );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * 1 = qmul[q[0] ^ dq[0]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * 14 = qmul[q[64] ^ dq[64]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) asm volatile("vmovdqa64 %0, %%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) "vmovdqa64 %1, %%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) "vpxorq %%zmm1, %%zmm2, %%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) "vpxorq %%zmm14, %%zmm12, %%zmm12"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) : "m" (p[0]), "m" (p[64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * 12 = p[64] ^ qmul[q[64] ^ dq[64]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) asm volatile("vmovdqa64 %%zmm1, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) "vmovdqa64 %%zmm14, %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) "vmovdqa64 %%zmm2, %2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) "vmovdqa64 %%zmm12,%3"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) : "m" (dq[0]), "m" (dq[64]), "m" (p[0]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) "m" (p[64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) bytes -= 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) p += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) q += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) dq += 128;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) asm volatile("vmovdqa64 %0, %%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) "vpxorq %1, %%zmm3, %%zmm3"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) : "m" (dq[0]), "m" (q[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) /* 3 = q ^ dq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) asm volatile("vbroadcasti64x2 %0, %%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) "vbroadcasti64x2 %1, %%zmm1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) : "m" (qmul[0]), "m" (qmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) asm volatile("vpsraw $4, %%zmm3, %%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) "vpandq %%zmm7, %%zmm3, %%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) "vpandq %%zmm7, %%zmm6, %%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) "vpshufb %%zmm3, %%zmm0, %%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) "vpshufb %%zmm6, %%zmm1, %%zmm1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) "vpxorq %%zmm0, %%zmm1, %%zmm1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) : );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) /* 1 = qmul[q ^ dq] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) asm volatile("vmovdqa64 %0, %%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) "vpxorq %%zmm1, %%zmm2, %%zmm2"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) : "m" (p[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) /* 2 = p ^ qmul[q ^ dq] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) asm volatile("vmovdqa64 %%zmm1, %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) "vmovdqa64 %%zmm2, %1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) : "m" (dq[0]), "m" (p[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) bytes -= 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) p += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) q += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) dq += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) const struct raid6_recov_calls raid6_recov_avx512 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) .data2 = raid6_2data_recov_avx512,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) .datap = raid6_datap_recov_avx512,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) .valid = raid6_has_avx512,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) .name = "avx512x2",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) .name = "avx512x1",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) .priority = 3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) #warning "your version of binutils lacks AVX512 support"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) #endif