^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* -*- linux-c -*- ------------------------------------------------------- *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright 2002 H. Peter Anvin - All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * ----------------------------------------------------------------------- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * raid6/sse2.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * SSE-2 implementation of RAID-6 syndrome functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <linux/raid/pq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include "x86.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) static const struct raid6_sse_constants {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) u64 x1d[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) } raid6_sse_constants __attribute__((aligned(16))) = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) { 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) static int raid6_have_sse2(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) /* Not really boot_cpu but "all_cpus" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) return boot_cpu_has(X86_FEATURE_MMX) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) boot_cpu_has(X86_FEATURE_FXSR) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) boot_cpu_has(X86_FEATURE_XMM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) boot_cpu_has(X86_FEATURE_XMM2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * Plain SSE2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) z0 = disks - 3; /* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) p = dptr[z0+1]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) q = dptr[z0+2]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) for ( d = 0 ; d < bytes ; d += 16 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) for ( z = z0-2 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) asm volatile("pxor %xmm6,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) asm volatile("pxor %xmm6,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) asm volatile("pxor %xmm6,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) asm volatile("pxor %xmm6,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) asm volatile("pxor %xmm2,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) asm volatile("pxor %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) z0 = stop; /* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) p = dptr[disks-2]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) q = dptr[disks-1]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) for ( d = 0 ; d < bytes ; d += 16 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) asm volatile("pxor %xmm4,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) /* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) for ( z = z0-1 ; z >= start ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) asm volatile("pxor %xmm5,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) /* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) for ( z = start-1 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /* Don't use movntdq for r/w memory area < cache line */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) const struct raid6_calls raid6_sse2x1 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) raid6_sse21_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) raid6_sse21_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) raid6_have_sse2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) "sse2x1",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 1 /* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * Unrolled-by-2 SSE2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) z0 = disks - 3; /* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) p = dptr[z0+1]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) q = dptr[z0+2]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) /* We uniformly assume a single prefetch covers at least 32 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) for ( d = 0 ; d < bytes ; d += 32 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) for ( z = z0-1 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) asm volatile("pxor %xmm5,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) asm volatile("pxor %xmm7,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) z0 = stop; /* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) p = dptr[disks-2]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) q = dptr[disks-1]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) for ( d = 0 ; d < bytes ; d += 32 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) asm volatile("pxor %xmm4,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) asm volatile("pxor %xmm6,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) /* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) for ( z = z0-1 ; z >= start ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) asm volatile("pxor %xmm5,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) asm volatile("pxor %xmm7,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) /* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) for ( z = start-1 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) /* Don't use movntdq for r/w memory area < cache line */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) const struct raid6_calls raid6_sse2x2 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) raid6_sse22_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) raid6_sse22_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) raid6_have_sse2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) "sse2x2",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 1 /* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) * Unrolled-by-4 SSE2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) z0 = disks - 3; /* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) p = dptr[z0+1]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) q = dptr[z0+2]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) asm volatile("pxor %xmm2,%xmm2"); /* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) asm volatile("pxor %xmm3,%xmm3"); /* P[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) asm volatile("pxor %xmm4,%xmm4"); /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) asm volatile("pxor %xmm6,%xmm6"); /* Q[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) asm volatile("pxor %xmm10,%xmm10"); /* P[2] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) asm volatile("pxor %xmm11,%xmm11"); /* P[3] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) asm volatile("pxor %xmm12,%xmm12"); /* Q[2] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) asm volatile("pxor %xmm13,%xmm13"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) asm volatile("pxor %xmm14,%xmm14"); /* Q[3] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) asm volatile("pxor %xmm15,%xmm15"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) for ( d = 0 ; d < bytes ; d += 64 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) for ( z = z0 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) /* The second prefetch seems to improve performance... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) asm volatile("pcmpgtb %xmm12,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) asm volatile("pcmpgtb %xmm14,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) asm volatile("paddb %xmm12,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) asm volatile("paddb %xmm14,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) asm volatile("pand %xmm0,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) asm volatile("pand %xmm0,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) asm volatile("pxor %xmm13,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) asm volatile("pxor %xmm15,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) asm volatile("pxor %xmm5,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) asm volatile("pxor %xmm7,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) asm volatile("pxor %xmm13,%xmm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) asm volatile("pxor %xmm15,%xmm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) asm volatile("pxor %xmm13,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) asm volatile("pxor %xmm15,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) asm volatile("pxor %xmm13,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) asm volatile("pxor %xmm15,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) asm volatile("pxor %xmm2,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) asm volatile("pxor %xmm3,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) asm volatile("pxor %xmm10,%xmm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) asm volatile("pxor %xmm11,%xmm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) asm volatile("pxor %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) asm volatile("pxor %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) asm volatile("pxor %xmm12,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) asm volatile("pxor %xmm14,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) z0 = stop; /* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) p = dptr[disks-2]; /* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) q = dptr[disks-1]; /* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) for ( d = 0 ; d < bytes ; d += 64 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) asm volatile("pxor %xmm4,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) asm volatile("pxor %xmm6,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) asm volatile("pxor %xmm12,%xmm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) asm volatile("pxor %xmm14,%xmm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) /* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) for ( z = z0-1 ; z >= start ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) asm volatile("pxor %xmm13,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) asm volatile("pxor %xmm15,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) asm volatile("pcmpgtb %xmm12,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) asm volatile("pcmpgtb %xmm14,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) asm volatile("paddb %xmm12,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) asm volatile("paddb %xmm14,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) asm volatile("pand %xmm0,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) asm volatile("pand %xmm0,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) asm volatile("pxor %xmm13,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) asm volatile("pxor %xmm15,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) asm volatile("pxor %xmm5,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) asm volatile("pxor %xmm7,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) asm volatile("pxor %xmm13,%xmm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) asm volatile("pxor %xmm15,%xmm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) asm volatile("pxor %xmm13,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) asm volatile("pxor %xmm15,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) asm volatile("prefetchnta %0" :: "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) asm volatile("prefetchnta %0" :: "m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) /* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) for ( z = start-1 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) asm volatile("pxor %xmm13,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) asm volatile("pxor %xmm15,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) asm volatile("pcmpgtb %xmm12,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) asm volatile("pcmpgtb %xmm14,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) asm volatile("paddb %xmm12,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) asm volatile("paddb %xmm14,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) asm volatile("pand %xmm0,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) asm volatile("pand %xmm0,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) asm volatile("pxor %xmm13,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) asm volatile("pxor %xmm15,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) const struct raid6_calls raid6_sse2x4 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) raid6_sse24_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) raid6_sse24_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) raid6_have_sse2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) "sse2x4",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 1 /* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) #endif /* CONFIG_X86_64 */