^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* -*- linux-c -*- ------------------------------------------------------- *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2012 Intel Corporation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * ----------------------------------------------------------------------- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * AVX2 implementation of RAID-6 syndrome functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/raid/pq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include "x86.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) static const struct raid6_avx2_constants {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) u64 x1d[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) } raid6_avx2_constants __aligned(32) = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) static int raid6_have_avx2(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * Plain AVX2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) z0 = disks - 3; /* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) p = dptr[z0+1]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) q = dptr[z0+2]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) for (d = 0; d < bytes; d += 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) for (z = z0-2; z >= 0; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) asm volatile("vpxor %ymm6,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) asm volatile("vpxor %ymm6,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) asm volatile("vpxor %ymm6,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) asm volatile("vpxor %ymm6,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) asm volatile("vpxor %ymm2,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) asm volatile("vpxor %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) static void raid6_avx21_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) z0 = stop; /* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) p = dptr[disks-2]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) q = dptr[disks-1]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) for (d = 0 ; d < bytes ; d += 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) asm volatile("vpxor %ymm4,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) /* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) for (z = z0-1 ; z >= start ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) asm volatile("vpxor %ymm5,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) /* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) for (z = start-1 ; z >= 0 ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) /* Don't use movntdq for r/w memory area < cache line */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) const struct raid6_calls raid6_avx2x1 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) raid6_avx21_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) raid6_avx21_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) raid6_have_avx2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) "avx2x1",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 1 /* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) * Unrolled-by-2 AVX2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) z0 = disks - 3; /* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) p = dptr[z0+1]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) q = dptr[z0+2]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /* We uniformly assume a single prefetch covers at least 32 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) for (d = 0; d < bytes; d += 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) for (z = z0-1; z >= 0; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) asm volatile("vpxor %ymm5,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) asm volatile("vpxor %ymm7,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) static void raid6_avx22_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) z0 = stop; /* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) p = dptr[disks-2]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) q = dptr[disks-1]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) for (d = 0 ; d < bytes ; d += 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) asm volatile("vpxor %ymm4,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) asm volatile("vpxor %ymm6,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) /* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) for (z = z0-1 ; z >= start ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) asm volatile("vpxor %ymm7,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) asm volatile("vmovdqa %0,%%ymm7"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) asm volatile("vpxor %ymm5,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) asm volatile("vpxor %ymm7,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) /* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) for (z = start-1 ; z >= 0 ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) asm volatile("vpxor %ymm7,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) /* Don't use movntdq for r/w memory area < cache line */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) const struct raid6_calls raid6_avx2x2 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) raid6_avx22_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) raid6_avx22_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) raid6_have_avx2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) "avx2x2",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 1 /* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) * Unrolled-by-4 AVX2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) z0 = disks - 3; /* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) p = dptr[z0+1]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) q = dptr[z0+2]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) asm volatile("vpxor %ymm2,%ymm2,%ymm2"); /* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* P[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) asm volatile("vpxor %ymm4,%ymm4,%ymm4"); /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) asm volatile("vpxor %ymm6,%ymm6,%ymm6"); /* Q[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) asm volatile("vpxor %ymm10,%ymm10,%ymm10"); /* P[2] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) asm volatile("vpxor %ymm11,%ymm11,%ymm11"); /* P[3] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) asm volatile("vpxor %ymm12,%ymm12,%ymm12"); /* Q[2] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) asm volatile("vpxor %ymm14,%ymm14,%ymm14"); /* Q[3] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) for (d = 0; d < bytes; d += 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) for (z = z0; z >= 0; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) asm volatile("vpand %ymm0,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) asm volatile("vpand %ymm0,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) asm volatile("vpxor %ymm13,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) asm volatile("vpxor %ymm15,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) asm volatile("vpxor %ymm5,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) asm volatile("vpxor %ymm7,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) asm volatile("vpxor %ymm13,%ymm10,%ymm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) asm volatile("vpxor %ymm15,%ymm11,%ymm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) asm volatile("vpxor %ymm13,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) asm volatile("vpxor %ymm15,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) asm volatile("vpxor %ymm2,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) asm volatile("vpxor %ymm3,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) asm volatile("vpxor %ymm10,%ymm10,%ymm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) asm volatile("vpxor %ymm11,%ymm11,%ymm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) asm volatile("vpxor %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) asm volatile("vpxor %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) asm volatile("vpxor %ymm12,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) asm volatile("vpxor %ymm14,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) static void raid6_avx24_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) z0 = stop; /* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) p = dptr[disks-2]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) q = dptr[disks-1]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) for (d = 0 ; d < bytes ; d += 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) asm volatile("vpxor %ymm4,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) asm volatile("vpxor %ymm6,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) asm volatile("vpxor %ymm12,%ymm10,%ymm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) asm volatile("vpxor %ymm14,%ymm11,%ymm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) /* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) for (z = z0-1 ; z >= start ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) asm volatile("vpxor %ymm7,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) asm volatile("vpxor %ymm13,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) asm volatile("vpxor %ymm15,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) asm volatile("vpand %ymm0,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) asm volatile("vpand %ymm0,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) asm volatile("vpxor %ymm13,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) asm volatile("vpxor %ymm15,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) asm volatile("vmovdqa %0,%%ymm7"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) asm volatile("vmovdqa %0,%%ymm13"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) :: "m" (dptr[z][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) asm volatile("vmovdqa %0,%%ymm15"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) :: "m" (dptr[z][d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) asm volatile("vpxor %ymm5,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) asm volatile("vpxor %ymm7,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) asm volatile("vpxor %ymm13,%ymm10,%ymm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) asm volatile("vpxor %ymm15,%ymm11,%ymm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) asm volatile("vpxor %ymm13,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) asm volatile("vpxor %ymm15,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) asm volatile("prefetchnta %0" :: "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) asm volatile("prefetchnta %0" :: "m" (q[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) /* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) for (z = start-1 ; z >= 0 ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) asm volatile("vpxor %ymm7,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) asm volatile("vpxor %ymm13,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) asm volatile("vpxor %ymm15,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) asm volatile("vpand %ymm0,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) asm volatile("vpand %ymm0,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) asm volatile("vpxor %ymm13,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) asm volatile("vpxor %ymm15,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) const struct raid6_calls raid6_avx2x4 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) raid6_avx24_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) raid6_avx24_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) raid6_have_avx2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) "avx2x4",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 1 /* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) #endif