Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /* -*- linux-c -*- ------------------------------------------------------- *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *   Copyright 2002 H. Peter Anvin - All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * ----------------------------------------------------------------------- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  * raid6/sse2.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  * SSE-2 implementation of RAID-6 syndrome functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) #include <linux/raid/pq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) #include "x86.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) static const struct raid6_sse_constants {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) 	u64 x1d[2];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) } raid6_sse_constants  __attribute__((aligned(16))) = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) 	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL },
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) static int raid6_have_sse2(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 	/* Not really boot_cpu but "all_cpus" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 	return boot_cpu_has(X86_FEATURE_MMX) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 		boot_cpu_has(X86_FEATURE_FXSR) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 		boot_cpu_has(X86_FEATURE_XMM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 		boot_cpu_has(X86_FEATURE_XMM2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34)  * Plain SSE2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	z0 = disks - 3;		/* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	p = dptr[z0+1];		/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	q = dptr[z0+2];		/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 	for ( d = 0 ; d < bytes ; d += 16 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d])); /* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 		asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z0-1][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 		for ( z = z0-2 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 			asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 			asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 			asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 			asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 			asm volatile("pxor %xmm6,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 			asm volatile("pxor %xmm6,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 			asm volatile("movdqa %0,%%xmm6" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 		asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 		asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 		asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 		asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 		asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 		asm volatile("pxor %xmm6,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 		asm volatile("pxor %xmm6,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 		asm volatile("pxor %xmm2,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 		asm volatile("pxor %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 				     size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	z0 = stop;		/* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	p = dptr[disks-2];	/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	q = dptr[disks-1];	/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	for ( d = 0 ; d < bytes ; d += 16 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 		asm volatile("pxor %xmm4,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 		/* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 		for ( z = z0-1 ; z >= start ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 			asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 			asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 			asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 			asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 			asm volatile("pxor %xmm5,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 		/* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 		for ( z = start-1 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 			asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 			asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 			asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 			asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 		/* Don't use movntdq for r/w memory area < cache line */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 		asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 		asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) const struct raid6_calls raid6_sse2x1 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	raid6_sse21_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	raid6_sse21_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	raid6_have_sse2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	"sse2x1",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	1			/* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)  * Unrolled-by-2 SSE2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 	z0 = disks - 3;		/* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	p = dptr[z0+1];		/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 	q = dptr[z0+2];		/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	asm volatile("pxor %xmm5,%xmm5"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	asm volatile("pxor %xmm7,%xmm7"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	/* We uniformly assume a single prefetch covers at least 32 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 	for ( d = 0 ; d < bytes ; d += 32 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 		asm volatile("movdqa %0,%%xmm2" : : "m" (dptr[z0][d]));    /* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 		asm volatile("movdqa %0,%%xmm3" : : "m" (dptr[z0][d+16])); /* P[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 		asm volatile("movdqa %xmm2,%xmm4"); /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 		asm volatile("movdqa %xmm3,%xmm6"); /* Q[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 		for ( z = z0-1 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 			asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 			asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 			asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 			asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 			asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 			asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 			asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 			asm volatile("movdqa %0,%%xmm5" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 			asm volatile("movdqa %0,%%xmm7" : : "m" (dptr[z][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 			asm volatile("pxor %xmm5,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 			asm volatile("pxor %xmm7,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 			asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 			asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 			asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 				     size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 	z0 = stop;		/* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 	p = dptr[disks-2];	/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 	q = dptr[disks-1];	/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 	asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	for ( d = 0 ; d < bytes ; d += 32 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 		asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 		asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 		asm volatile("pxor %xmm4,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 		asm volatile("pxor %xmm6,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 		/* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 		for ( z = z0-1 ; z >= start ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 			asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 			asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 			asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 			asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 			asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 			asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 			asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 			asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 			asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 			asm volatile("pxor %xmm5,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 			asm volatile("pxor %xmm7,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 			asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 		/* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 		for ( z = start-1 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 			asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 			asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 			asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 			asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 			asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 			asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 			asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 			asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 			asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 		asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 		/* Don't use movntdq for r/w memory area < cache line */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 		asm volatile("movdqa %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 		asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 		asm volatile("movdqa %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 		asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) const struct raid6_calls raid6_sse2x2 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 	raid6_sse22_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	raid6_sse22_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 	raid6_have_sse2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 	"sse2x2",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	1			/* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)  * Unrolled-by-4 SSE2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 	z0 = disks - 3;		/* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 	p = dptr[z0+1];		/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	q = dptr[z0+2];		/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 	asm volatile("pxor %xmm2,%xmm2");	/* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 	asm volatile("pxor %xmm3,%xmm3");	/* P[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	asm volatile("pxor %xmm4,%xmm4"); 	/* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 	asm volatile("pxor %xmm5,%xmm5");	/* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	asm volatile("pxor %xmm6,%xmm6"); 	/* Q[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 	asm volatile("pxor %xmm7,%xmm7"); 	/* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	asm volatile("pxor %xmm10,%xmm10");	/* P[2] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	asm volatile("pxor %xmm11,%xmm11");	/* P[3] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 	asm volatile("pxor %xmm12,%xmm12"); 	/* Q[2] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 	asm volatile("pxor %xmm13,%xmm13");	/* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 	asm volatile("pxor %xmm14,%xmm14"); 	/* Q[3] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 	asm volatile("pxor %xmm15,%xmm15"); 	/* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 	for ( d = 0 ; d < bytes ; d += 64 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 		for ( z = z0 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 			/* The second prefetch seems to improve performance... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 			asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 			asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 			asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 			asm volatile("pcmpgtb %xmm12,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 			asm volatile("pcmpgtb %xmm14,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 			asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 			asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 			asm volatile("paddb %xmm12,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 			asm volatile("paddb %xmm14,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 			asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 			asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 			asm volatile("pand %xmm0,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 			asm volatile("pand %xmm0,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 			asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 			asm volatile("pxor %xmm13,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 			asm volatile("pxor %xmm15,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 			asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 			asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 			asm volatile("pxor %xmm5,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 			asm volatile("pxor %xmm7,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 			asm volatile("pxor %xmm13,%xmm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 			asm volatile("pxor %xmm15,%xmm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 			asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 			asm volatile("pxor %xmm13,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 			asm volatile("pxor %xmm15,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 			asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 			asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 			asm volatile("pxor %xmm13,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 			asm volatile("pxor %xmm15,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 		asm volatile("pxor %xmm2,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 		asm volatile("pxor %xmm3,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 		asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 		asm volatile("pxor %xmm10,%xmm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 		asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 		asm volatile("pxor %xmm11,%xmm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 		asm volatile("pxor %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 		asm volatile("pxor %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 		asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 		asm volatile("pxor %xmm12,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 		asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 		asm volatile("pxor %xmm14,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 				     size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 	z0 = stop;		/* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 	p = dptr[disks-2];	/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 	q = dptr[disks-1];	/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 	asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 	for ( d = 0 ; d < bytes ; d += 64 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 		asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 		asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 		asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 		asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 		asm volatile("movdqa %0,%%xmm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 		asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 		asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 		asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 		asm volatile("pxor %xmm4,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 		asm volatile("pxor %xmm6,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 		asm volatile("pxor %xmm12,%xmm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 		asm volatile("pxor %xmm14,%xmm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 		/* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 		for ( z = z0-1 ; z >= start ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 			asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 			asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 			asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 			asm volatile("pxor %xmm13,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 			asm volatile("pxor %xmm15,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 			asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 			asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 			asm volatile("pcmpgtb %xmm12,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 			asm volatile("pcmpgtb %xmm14,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 			asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 			asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 			asm volatile("paddb %xmm12,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 			asm volatile("paddb %xmm14,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 			asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 			asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 			asm volatile("pand %xmm0,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 			asm volatile("pand %xmm0,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 			asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 			asm volatile("pxor %xmm13,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 			asm volatile("pxor %xmm15,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 			asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 			asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 			asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 			asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 			asm volatile("pxor %xmm5,%xmm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 			asm volatile("pxor %xmm7,%xmm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 			asm volatile("pxor %xmm13,%xmm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 			asm volatile("pxor %xmm15,%xmm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 			asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 			asm volatile("pxor %xmm13,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 			asm volatile("pxor %xmm15,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 		asm volatile("prefetchnta %0" :: "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 		asm volatile("prefetchnta %0" :: "m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 		/* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 		for ( z = start-1 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 			asm volatile("pxor %xmm5,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 			asm volatile("pxor %xmm7,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 			asm volatile("pxor %xmm13,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 			asm volatile("pxor %xmm15,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 			asm volatile("pcmpgtb %xmm4,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 			asm volatile("pcmpgtb %xmm6,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 			asm volatile("pcmpgtb %xmm12,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 			asm volatile("pcmpgtb %xmm14,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 			asm volatile("paddb %xmm4,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 			asm volatile("paddb %xmm6,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 			asm volatile("paddb %xmm12,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 			asm volatile("paddb %xmm14,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 			asm volatile("pand %xmm0,%xmm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 			asm volatile("pand %xmm0,%xmm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 			asm volatile("pand %xmm0,%xmm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 			asm volatile("pand %xmm0,%xmm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 			asm volatile("pxor %xmm5,%xmm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 			asm volatile("pxor %xmm7,%xmm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 			asm volatile("pxor %xmm13,%xmm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 			asm volatile("pxor %xmm15,%xmm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 		asm volatile("movntdq %%xmm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 		asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 		asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 		asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 		asm volatile("pxor %0,%%xmm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 		asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 		asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 		asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 		asm volatile("movntdq %%xmm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) 		asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 		asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 		asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) const struct raid6_calls raid6_sse2x4 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) 	raid6_sse24_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 	raid6_sse24_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 	raid6_have_sse2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 	"sse2x4",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 	1			/* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) #endif /* CONFIG_X86_64 */