^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* -*- linux-c -*- --------------------------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2016 Intel Corporation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Author: Gayatri Kammela <gayatri.kammela@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Author: Megha Dey <megha.dey@linux.intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Based on avx2.c: Copyright 2012 Yuanhan Liu All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * -----------------------------------------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * AVX512 implementation of RAID-6 syndrome functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #ifdef CONFIG_AS_AVX512
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #include <linux/raid/pq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include "x86.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) static const struct raid6_avx512_constants {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) u64 x1d[8];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) } raid6_avx512_constants __aligned(512/8) = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static int raid6_have_avx512(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) return boot_cpu_has(X86_FEATURE_AVX2) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) boot_cpu_has(X86_FEATURE_AVX) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) boot_cpu_has(X86_FEATURE_AVX512F) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) boot_cpu_has(X86_FEATURE_AVX512BW) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) boot_cpu_has(X86_FEATURE_AVX512VL) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) boot_cpu_has(X86_FEATURE_AVX512DQ);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) static void raid6_avx5121_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) z0 = disks - 3; /* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) p = dptr[z0+1]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) q = dptr[z0+2]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) asm volatile("vmovdqa64 %0,%%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) : "m" (raid6_avx512_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) for (d = 0; d < bytes; d += 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) asm volatile("prefetchnta %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) "prefetchnta %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) "vmovdqa64 %1,%%zmm6"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) : "m" (dptr[z0][d]), "m" (dptr[z0-1][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) for (z = z0-2; z >= 0; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) asm volatile("prefetchnta %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) "vpmovm2b %%k1,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) "vmovdqa64 %0,%%zmm6"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) asm volatile("vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) "vpmovm2b %%k1,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) "vpxorq %%zmm6,%%zmm2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) "vpxorq %%zmm6,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) "vmovntdq %%zmm2,%0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) "vmovntdq %%zmm4,%1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) "vpxorq %%zmm4,%%zmm4,%%zmm4"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) : "m" (p[d]), "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) static void raid6_avx5121_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) z0 = stop; /* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) p = dptr[disks-2]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) q = dptr[disks-1]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) asm volatile("vmovdqa64 %0,%%zmm0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) : : "m" (raid6_avx512_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) for (d = 0 ; d < bytes ; d += 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) asm volatile("vmovdqa64 %0,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) "vmovdqa64 %1,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) "vpxorq %%zmm4,%%zmm2,%%zmm2"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) : "m" (dptr[z0][d]), "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) /* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) for (z = z0-1 ; z >= start ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) "vpmovm2b %%k1,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) "vmovdqa64 %0,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) "vpxorq %%zmm5,%%zmm4,%%zmm4"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) /* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) for (z = start-1 ; z >= 0 ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) "vpmovm2b %%k1,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) "vpxorq %%zmm5,%%zmm4,%%zmm4"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) : );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) /* Don't use movntdq for r/w memory area < cache line */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) "vmovdqa64 %%zmm4,%0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) "vmovdqa64 %%zmm2,%1"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) : "m" (q[d]), "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) const struct raid6_calls raid6_avx512x1 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) raid6_avx5121_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) raid6_avx5121_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) raid6_have_avx512,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) "avx512x1",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 1 /* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) * Unrolled-by-2 AVX512 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) static void raid6_avx5122_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) z0 = disks - 3; /* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) p = dptr[z0+1]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) q = dptr[z0+2]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) asm volatile("vmovdqa64 %0,%%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) "vpxorq %%zmm1,%%zmm1,%%zmm1" /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) : "m" (raid6_avx512_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /* We uniformly assume a single prefetch covers at least 64 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) for (d = 0; d < bytes; d += 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) asm volatile("prefetchnta %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) "prefetchnta %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) "vmovdqa64 %0,%%zmm2\n\t" /* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) "vmovdqa64 %1,%%zmm3\n\t" /* P[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) "vmovdqa64 %%zmm2,%%zmm4\n\t" /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) "vmovdqa64 %%zmm3,%%zmm6" /* Q[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) for (z = z0-1; z >= 0; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) asm volatile("prefetchnta %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) "prefetchnta %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) "vpmovm2b %%k1,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) "vpmovm2b %%k2,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) "vmovdqa64 %0,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) "vmovdqa64 %1,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) "vpxorq %%zmm7,%%zmm6,%%zmm6"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) asm volatile("vmovntdq %%zmm2,%0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) "vmovntdq %%zmm3,%1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) "vmovntdq %%zmm4,%2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) "vmovntdq %%zmm6,%3"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) : "m" (p[d]), "m" (p[d+64]), "m" (q[d]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) "m" (q[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) static void raid6_avx5122_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) z0 = stop; /* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) p = dptr[disks-2]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) q = dptr[disks-1]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) asm volatile("vmovdqa64 %0,%%zmm0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) : : "m" (raid6_avx512_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) for (d = 0 ; d < bytes ; d += 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) asm volatile("vmovdqa64 %0,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) "vmovdqa64 %1,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) "vmovdqa64 %2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) "vmovdqa64 %3,%%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) "vpxorq %%zmm6,%%zmm3,%%zmm3"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) "m" (p[d]), "m" (p[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) /* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) for (z = z0-1 ; z >= start ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) "vpmovm2b %%k1,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) "vpmovm2b %%k2,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) "vmovdqa64 %0,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) "vmovdqa64 %1,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) "vpxorq %%zmm7,%%zmm6,%%zmm6"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) : "m" (dptr[z][d]), "m" (dptr[z][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) /* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) for (z = start-1 ; z >= 0 ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) "vpmovm2b %%k1,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) "vpmovm2b %%k2,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) "vpxorq %%zmm7,%%zmm6,%%zmm6"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) : );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) asm volatile("vpxorq %0,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) "vpxorq %1,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) /* Don't use movntdq for r/w
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * memory area < cache line
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) "vmovdqa64 %%zmm4,%0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) "vmovdqa64 %%zmm6,%1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) "vmovdqa64 %%zmm2,%2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) "vmovdqa64 %%zmm3,%3"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) : "m" (q[d]), "m" (q[d+64]), "m" (p[d]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) "m" (p[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) const struct raid6_calls raid6_avx512x2 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) raid6_avx5122_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) raid6_avx5122_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) raid6_have_avx512,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) "avx512x2",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 1 /* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) * Unrolled-by-4 AVX2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) static void raid6_avx5124_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) z0 = disks - 3; /* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) p = dptr[z0+1]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) q = dptr[z0+2]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) asm volatile("vmovdqa64 %0,%%zmm0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) "vpxorq %%zmm1,%%zmm1,%%zmm1\n\t" /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t" /* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t" /* P[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t" /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t" /* Q[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t" /* P[2] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t" /* P[3] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t" /* Q[2] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) "vpxorq %%zmm14,%%zmm14,%%zmm14" /* Q[3] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) : "m" (raid6_avx512_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) for (d = 0; d < bytes; d += 256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) for (z = z0; z >= 0; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) asm volatile("prefetchnta %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) "prefetchnta %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) "prefetchnta %2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) "prefetchnta %3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) "vpcmpgtb %%zmm4,%%zmm1,%%k1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) "vpcmpgtb %%zmm6,%%zmm1,%%k2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) "vpcmpgtb %%zmm12,%%zmm1,%%k3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) "vpcmpgtb %%zmm14,%%zmm1,%%k4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) "vpmovm2b %%k1,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) "vpmovm2b %%k2,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) "vpmovm2b %%k3,%%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) "vpmovm2b %%k4,%%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) "vmovdqa64 %0,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) "vmovdqa64 %1,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) "vmovdqa64 %2,%%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) "vmovdqa64 %3,%%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) "vpxorq %%zmm15,%%zmm11,%%zmm11\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) "vpxorq %%zmm15,%%zmm14,%%zmm14"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) "m" (dptr[z][d+128]), "m" (dptr[z][d+192]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) asm volatile("vmovntdq %%zmm2,%0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) "vpxorq %%zmm2,%%zmm2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) "vmovntdq %%zmm3,%1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) "vpxorq %%zmm3,%%zmm3,%%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) "vmovntdq %%zmm10,%2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) "vpxorq %%zmm10,%%zmm10,%%zmm10\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) "vmovntdq %%zmm11,%3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) "vpxorq %%zmm11,%%zmm11,%%zmm11\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) "vmovntdq %%zmm4,%4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) "vpxorq %%zmm4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) "vmovntdq %%zmm6,%5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) "vpxorq %%zmm6,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) "vmovntdq %%zmm12,%6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) "vpxorq %%zmm12,%%zmm12,%%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) "vmovntdq %%zmm14,%7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) "vpxorq %%zmm14,%%zmm14,%%zmm14"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) "m" (q[d+128]), "m" (q[d+192]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) static void raid6_avx5124_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) z0 = stop; /* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) p = dptr[disks-2]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) q = dptr[disks-1]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) asm volatile("vmovdqa64 %0,%%zmm0"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) :: "m" (raid6_avx512_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) for (d = 0 ; d < bytes ; d += 256) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) asm volatile("vmovdqa64 %0,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) "vmovdqa64 %1,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) "vmovdqa64 %2,%%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) "vmovdqa64 %3,%%zmm14\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) "vmovdqa64 %4,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) "vmovdqa64 %5,%%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) "vmovdqa64 %6,%%zmm10\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) "vmovdqa64 %7,%%zmm11\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) "vpxorq %%zmm4,%%zmm2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) "vpxorq %%zmm6,%%zmm3,%%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) "vpxorq %%zmm12,%%zmm10,%%zmm10\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) "vpxorq %%zmm14,%%zmm11,%%zmm11"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) : "m" (dptr[z0][d]), "m" (dptr[z0][d+64]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) "m" (dptr[z0][d+128]), "m" (dptr[z0][d+192]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) "m" (p[d+192]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) /* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) for (z = z0-1 ; z >= start ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) "prefetchnta %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) "prefetchnta %2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) "vpmovm2b %%k1,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) "vpmovm2b %%k2,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) "vpmovm2b %%k3,%%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) "vpmovm2b %%k4,%%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) "vpaddb %%Zmm14,%%zmm14,%%zmm14\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) "vpxorq %%zmm15,%%zmm14,%%zmm14\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) "vmovdqa64 %0,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) "vmovdqa64 %1,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) "vmovdqa64 %2,%%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) "vmovdqa64 %3,%%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) "vpxorq %%zmm5,%%zmm2,%%zmm2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) "vpxorq %%zmm7,%%zmm3,%%zmm3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) "vpxorq %%zmm13,%%zmm10,%%zmm10\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) "vpxorq %%zmm15,%%zmm11,%%zmm11\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) "vpxorq %%zmm15,%%zmm14,%%zmm14"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) : "m" (dptr[z][d]), "m" (dptr[z][d+64]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) "m" (dptr[z][d+128]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) "m" (dptr[z][d+192]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) asm volatile("prefetchnta %0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) "prefetchnta %1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) : "m" (q[d]), "m" (q[d+128]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) /* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) for (z = start-1 ; z >= 0 ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) asm volatile("vpxorq %%zmm5,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) "vpxorq %%zmm7,%%zmm7,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) "vpxorq %%zmm13,%%zmm13,%%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) "vpxorq %%zmm15,%%zmm15,%%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) "vpcmpgtb %%zmm4,%%zmm5,%%k1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) "vpcmpgtb %%zmm6,%%zmm7,%%k2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) "vpcmpgtb %%zmm12,%%zmm13,%%k3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) "vpcmpgtb %%zmm14,%%zmm15,%%k4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) "vpmovm2b %%k1,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) "vpmovm2b %%k2,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) "vpmovm2b %%k3,%%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) "vpmovm2b %%k4,%%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) "vpaddb %%zmm4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) "vpaddb %%zmm6,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) "vpaddb %%zmm12,%%zmm12,%%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) "vpaddb %%zmm14,%%zmm14,%%zmm14\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) "vpandq %%zmm0,%%zmm5,%%zmm5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) "vpandq %%zmm0,%%zmm7,%%zmm7\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) "vpandq %%zmm0,%%zmm13,%%zmm13\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) "vpandq %%zmm0,%%zmm15,%%zmm15\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) "vpxorq %%zmm5,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) "vpxorq %%zmm7,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) "vpxorq %%zmm13,%%zmm12,%%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) "vpxorq %%zmm15,%%zmm14,%%zmm14"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) : );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) asm volatile("vmovntdq %%zmm2,%0\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) "vmovntdq %%zmm3,%1\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) "vmovntdq %%zmm10,%2\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) "vmovntdq %%zmm11,%3\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) "vpxorq %4,%%zmm4,%%zmm4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) "vpxorq %5,%%zmm6,%%zmm6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) "vpxorq %6,%%zmm12,%%zmm12\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) "vpxorq %7,%%zmm14,%%zmm14\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) "vmovntdq %%zmm4,%4\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) "vmovntdq %%zmm6,%5\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) "vmovntdq %%zmm12,%6\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) "vmovntdq %%zmm14,%7"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) : "m" (p[d]), "m" (p[d+64]), "m" (p[d+128]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) "m" (p[d+192]), "m" (q[d]), "m" (q[d+64]),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) "m" (q[d+128]), "m" (q[d+192]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) const struct raid6_calls raid6_avx512x4 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) raid6_avx5124_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) raid6_avx5124_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) raid6_have_avx512,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) "avx512x4",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) 1 /* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) #endif /* CONFIG_AS_AVX512 */