^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2012 Intel Corporation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Author: Jim Kukunas <james.t.kukunas@linux.intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <linux/raid/pq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include "x86.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) static int raid6_has_avx2(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) return boot_cpu_has(X86_FEATURE_AVX2) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) boot_cpu_has(X86_FEATURE_AVX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) static void raid6_2data_recov_avx2(int disks, size_t bytes, int faila,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) int failb, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) u8 *p, *q, *dp, *dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) const u8 *pbmul; /* P multiplier table for B data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) const u8 *qmul; /* Q multiplier table (for both) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) const u8 x0f = 0x0f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) p = (u8 *)ptrs[disks-2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) q = (u8 *)ptrs[disks-1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) /* Compute syndrome with zero for the missing data pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) Use the dead data pages as temporary storage for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) delta p and delta q */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) dp = (u8 *)ptrs[faila];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) ptrs[faila] = (void *)raid6_empty_zero_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) ptrs[disks-2] = dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) dq = (u8 *)ptrs[failb];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) ptrs[failb] = (void *)raid6_empty_zero_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) ptrs[disks-1] = dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) raid6_call.gen_syndrome(disks, bytes, ptrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) /* Restore pointer table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) ptrs[faila] = dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) ptrs[failb] = dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) ptrs[disks-2] = p;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) ptrs[disks-1] = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /* Now, pick the proper data tables */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) raid6_gfexp[failb]]];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) /* ymm0 = x0f[16] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) while (bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) asm volatile("vmovdqa %0, %%ymm1" : : "m" (q[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) asm volatile("vmovdqa %0, %%ymm9" : : "m" (q[32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) asm volatile("vmovdqa %0, %%ymm0" : : "m" (p[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) asm volatile("vmovdqa %0, %%ymm8" : : "m" (p[32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (dq[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) asm volatile("vpxor %0, %%ymm9, %%ymm9" : : "m" (dq[32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (dp[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (dp[32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * 1 = dq[0] ^ q[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * 9 = dq[32] ^ q[32]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * 0 = dp[0] ^ p[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * 8 = dp[32] ^ p[32]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) asm volatile("vpsraw $4, %ymm1, %ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) asm volatile("vpsraw $4, %ymm9, %ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) asm volatile("vpand %ymm7, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) asm volatile("vpand %ymm7, %ymm9, %ymm9");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) asm volatile("vpand %ymm7, %ymm3, %ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) asm volatile("vpand %ymm7, %ymm12, %ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) asm volatile("vpshufb %ymm9, %ymm4, %ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) asm volatile("vpshufb %ymm12, %ymm5, %ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) asm volatile("vpxor %ymm14, %ymm15, %ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) asm volatile("vpxor %ymm4, %ymm5, %ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * 5 = qx[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * 15 = qx[32]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) asm volatile("vpsraw $4, %ymm0, %ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) asm volatile("vpsraw $4, %ymm8, %ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) asm volatile("vpand %ymm7, %ymm0, %ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) asm volatile("vpand %ymm7, %ymm8, %ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) asm volatile("vpand %ymm7, %ymm2, %ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) asm volatile("vpand %ymm7, %ymm6, %ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) asm volatile("vpshufb %ymm14, %ymm4, %ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) asm volatile("vpshufb %ymm6, %ymm1, %ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) asm volatile("vpxor %ymm4, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) asm volatile("vpxor %ymm12, %ymm13, %ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) * 1 = pbmul[px[0]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) * 13 = pbmul[px[32]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) asm volatile("vpxor %ymm5, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) asm volatile("vpxor %ymm15, %ymm13, %ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * 1 = db = DQ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * 13 = db[32] = DQ[32]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) asm volatile("vmovdqa %%ymm13,%0" : "=m" (dq[32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) asm volatile("vpxor %ymm1, %ymm0, %ymm0");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) asm volatile("vpxor %ymm13, %ymm8, %ymm8");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) asm volatile("vmovdqa %%ymm8, %0" : "=m" (dp[32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) bytes -= 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) p += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) q += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) dp += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) dq += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) asm volatile("vmovdqa %0, %%ymm1" : : "m" (*q));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) asm volatile("vmovdqa %0, %%ymm0" : : "m" (*p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) asm volatile("vpxor %0, %%ymm1, %%ymm1" : : "m" (*dq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) asm volatile("vpxor %0, %%ymm0, %%ymm0" : : "m" (*dp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) /* 1 = dq ^ q; 0 = dp ^ p */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (qmul[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) asm volatile("vbroadcasti128 %0, %%ymm5" : : "m" (qmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) * 1 = dq ^ q
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) * 3 = dq ^ p >> 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) asm volatile("vpsraw $4, %ymm1, %ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) asm volatile("vpand %ymm7, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) asm volatile("vpand %ymm7, %ymm3, %ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) asm volatile("vpshufb %ymm1, %ymm4, %ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) asm volatile("vpshufb %ymm3, %ymm5, %ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) asm volatile("vpxor %ymm4, %ymm5, %ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) /* 5 = qx */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) asm volatile("vbroadcasti128 %0, %%ymm4" : : "m" (pbmul[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (pbmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) asm volatile("vpsraw $4, %ymm0, %ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) asm volatile("vpand %ymm7, %ymm0, %ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) asm volatile("vpand %ymm7, %ymm2, %ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) asm volatile("vpshufb %ymm3, %ymm4, %ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) asm volatile("vpshufb %ymm2, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) asm volatile("vpxor %ymm4, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) /* 1 = pbmul[px] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) asm volatile("vpxor %ymm5, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) /* 1 = db = DQ */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) asm volatile("vpxor %ymm1, %ymm0, %ymm0");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) asm volatile("vmovdqa %%ymm0, %0" : "=m" (dp[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) bytes -= 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) p += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) q += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) dp += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) dq += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) static void raid6_datap_recov_avx2(int disks, size_t bytes, int faila,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) u8 *p, *q, *dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) const u8 *qmul; /* Q multiplier table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) const u8 x0f = 0x0f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) p = (u8 *)ptrs[disks-2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) q = (u8 *)ptrs[disks-1];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) /* Compute syndrome with zero for the missing data page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) Use the dead data page as temporary storage for delta q */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) dq = (u8 *)ptrs[faila];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) ptrs[faila] = (void *)raid6_empty_zero_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) ptrs[disks-1] = dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) raid6_call.gen_syndrome(disks, bytes, ptrs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) /* Restore pointer table */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) ptrs[faila] = dq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) ptrs[disks-1] = q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) /* Now, pick the proper data tables */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) asm volatile("vpbroadcastb %0, %%ymm7" : : "m" (x0f));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) while (bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) asm volatile("vmovdqa %0, %%ymm8" : : "m" (dq[32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) asm volatile("vpxor %0, %%ymm8, %%ymm8" : : "m" (q[32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * 3 = q[0] ^ dq[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * 8 = q[32] ^ dq[32]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) asm volatile("vmovapd %ymm0, %ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) asm volatile("vmovapd %ymm1, %ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) asm volatile("vpsraw $4, %ymm3, %ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) asm volatile("vpsraw $4, %ymm8, %ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) asm volatile("vpand %ymm7, %ymm3, %ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) asm volatile("vpand %ymm7, %ymm8, %ymm8");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) asm volatile("vpand %ymm7, %ymm6, %ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) asm volatile("vpand %ymm7, %ymm12, %ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) asm volatile("vpshufb %ymm8, %ymm13, %ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) asm volatile("vpshufb %ymm12, %ymm14, %ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) asm volatile("vpxor %ymm0, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) asm volatile("vpxor %ymm13, %ymm14, %ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * 1 = qmul[q[0] ^ dq[0]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * 14 = qmul[q[32] ^ dq[32]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) asm volatile("vmovdqa %0, %%ymm12" : : "m" (p[32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) asm volatile("vpxor %ymm1, %ymm2, %ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) asm volatile("vpxor %ymm14, %ymm12, %ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) * 2 = p[0] ^ qmul[q[0] ^ dq[0]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * 12 = p[32] ^ qmul[q[32] ^ dq[32]]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) asm volatile("vmovdqa %%ymm14, %0" : "=m" (dq[32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) asm volatile("vmovdqa %%ymm12,%0" : "=m" (p[32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) bytes -= 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) p += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) q += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) dq += 64;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) asm volatile("vmovdqa %0, %%ymm3" : : "m" (dq[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) asm volatile("vpxor %0, %%ymm3, %%ymm3" : : "m" (q[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) /* 3 = q ^ dq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) asm volatile("vbroadcasti128 %0, %%ymm0" : : "m" (qmul[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) asm volatile("vbroadcasti128 %0, %%ymm1" : : "m" (qmul[16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) asm volatile("vpsraw $4, %ymm3, %ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) asm volatile("vpand %ymm7, %ymm3, %ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) asm volatile("vpand %ymm7, %ymm6, %ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) asm volatile("vpshufb %ymm3, %ymm0, %ymm0");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) asm volatile("vpshufb %ymm6, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) asm volatile("vpxor %ymm0, %ymm1, %ymm1");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) /* 1 = qmul[q ^ dq] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) asm volatile("vmovdqa %0, %%ymm2" : : "m" (p[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) asm volatile("vpxor %ymm1, %ymm2, %ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) /* 2 = p ^ qmul[q ^ dq] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) asm volatile("vmovdqa %%ymm1, %0" : "=m" (dq[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) asm volatile("vmovdqa %%ymm2, %0" : "=m" (p[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) bytes -= 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) p += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) q += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) dq += 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) const struct raid6_recov_calls raid6_recov_avx2 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) .data2 = raid6_2data_recov_avx2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) .datap = raid6_datap_recov_avx2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) .valid = raid6_has_avx2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) .name = "avx2x2",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) .name = "avx2x1",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) .priority = 2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) };