Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /* -*- linux-c -*- ------------------------------------------------------- *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *   Copyright (C) 2012 Intel Corporation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *   Author: Yuanhan Liu <yuanhan.liu@linux.intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *   Based on sse2.c: Copyright 2002 H. Peter Anvin - All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  * ----------------------------------------------------------------------- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  * AVX2 implementation of RAID-6 syndrome functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) #include <linux/raid/pq.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #include "x86.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) static const struct raid6_avx2_constants {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) 	u64 x1d[4];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) } raid6_avx2_constants __aligned(32) = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) 	{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) 	  0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) static int raid6_have_avx2(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 	return boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_AVX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32)  * Plain AVX2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 	z0 = disks - 3;		/* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	p = dptr[z0+1];		/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	q = dptr[z0+2];		/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	asm volatile("vpxor %ymm3,%ymm3,%ymm3");	/* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	for (d = 0; d < bytes; d += 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 		asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 		asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 		asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 		asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 		for (z = z0-2; z >= 0; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 			asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 			asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 			asm volatile("vpxor %ymm6,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 			asm volatile("vpxor %ymm6,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 			asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 		asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 		asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 		asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 		asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 		asm volatile("vpxor %ymm6,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 		asm volatile("vpxor %ymm6,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 		asm volatile("vpxor %ymm2,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 		asm volatile("vpxor %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) static void raid6_avx21_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 				     size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	z0 = stop;		/* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	p = dptr[disks-2];	/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	q = dptr[disks-1];	/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	for (d = 0 ; d < bytes ; d += 32) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 		asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 		asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 		asm volatile("vpxor %ymm4,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 		/* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 		for (z = z0-1 ; z >= start ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 			asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 			asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 			asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 			asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 			asm volatile("vpxor %ymm5,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 		/* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 		for (z = start-1 ; z >= 0 ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 			asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 			asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 			asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 		asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 		/* Don't use movntdq for r/w memory area < cache line */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 		asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 		asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) const struct raid6_calls raid6_avx2x1 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	raid6_avx21_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	raid6_avx21_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	raid6_have_avx2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	"avx2x1",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 	1			/* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)  * Unrolled-by-2 AVX2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 	z0 = disks - 3;		/* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	p = dptr[z0+1];		/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	q = dptr[z0+2];		/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 	asm volatile("vpxor %ymm1,%ymm1,%ymm1"); /* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 	/* We uniformly assume a single prefetch covers at least 32 bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	for (d = 0; d < bytes; d += 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 		asm volatile("prefetchnta %0" : : "m" (dptr[z0][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 		asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 		asm volatile("vmovdqa %0,%%ymm3" : : "m" (dptr[z0][d+32]));/* P[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 		asm volatile("vmovdqa %ymm2,%ymm4"); /* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 		asm volatile("vmovdqa %ymm3,%ymm6"); /* Q[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 		for (z = z0-1; z >= 0; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 			asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 			asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 			asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 			asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 			asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 			asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 			asm volatile("vpxor %ymm5,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 			asm volatile("vpxor %ymm7,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 		asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 		asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) static void raid6_avx22_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 				     size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 	z0 = stop;		/* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	p = dptr[disks-2];	/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 	q = dptr[disks-1];	/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 	for (d = 0 ; d < bytes ; d += 64) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 		asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 		asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 		asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 		asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 		asm volatile("vpxor %ymm4,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 		asm volatile("vpxor %ymm6,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 		/* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 		for (z = z0-1 ; z >= start ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 			asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 			asm volatile("vpxor %ymm7,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 			asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 			asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 			asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 			asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 			asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 			asm volatile("vmovdqa %0,%%ymm7"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 				     :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 			asm volatile("vpxor %ymm5,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 			asm volatile("vpxor %ymm7,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 		/* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 		for (z = start-1 ; z >= 0 ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 			asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 			asm volatile("vpxor %ymm7,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 			asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 			asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 			asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 			asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 		asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 		asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 		/* Don't use movntdq for r/w memory area < cache line */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 		asm volatile("vmovdqa %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 		asm volatile("vmovdqa %%ymm6,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 		asm volatile("vmovdqa %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 		asm volatile("vmovdqa %%ymm3,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) const struct raid6_calls raid6_avx2x2 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 	raid6_avx22_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 	raid6_avx22_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 	raid6_have_avx2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	"avx2x2",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 	1			/* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)  * Unrolled-by-4 AVX2 implementation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 	z0 = disks - 3;		/* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 	p = dptr[z0+1];		/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	q = dptr[z0+2];		/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 	asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	asm volatile("vpxor %ymm1,%ymm1,%ymm1");	/* Zero temp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 	asm volatile("vpxor %ymm2,%ymm2,%ymm2");	/* P[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	asm volatile("vpxor %ymm3,%ymm3,%ymm3");	/* P[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 	asm volatile("vpxor %ymm4,%ymm4,%ymm4");	/* Q[0] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	asm volatile("vpxor %ymm6,%ymm6,%ymm6");	/* Q[1] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 	asm volatile("vpxor %ymm10,%ymm10,%ymm10");	/* P[2] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 	asm volatile("vpxor %ymm11,%ymm11,%ymm11");	/* P[3] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	asm volatile("vpxor %ymm12,%ymm12,%ymm12");	/* Q[2] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 	asm volatile("vpxor %ymm14,%ymm14,%ymm14");	/* Q[3] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 	for (d = 0; d < bytes; d += 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 		for (z = z0; z >= 0; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 			asm volatile("prefetchnta %0" : : "m" (dptr[z][d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 			asm volatile("vpcmpgtb %ymm4,%ymm1,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 			asm volatile("vpcmpgtb %ymm6,%ymm1,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 			asm volatile("vpcmpgtb %ymm12,%ymm1,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 			asm volatile("vpcmpgtb %ymm14,%ymm1,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 			asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 			asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 			asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 			asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 			asm volatile("vpand %ymm0,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 			asm volatile("vpand %ymm0,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 			asm volatile("vpxor %ymm13,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 			asm volatile("vpxor %ymm15,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 			asm volatile("vmovdqa %0,%%ymm5" : : "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 			asm volatile("vmovdqa %0,%%ymm7" : : "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 			asm volatile("vmovdqa %0,%%ymm13" : : "m" (dptr[z][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 			asm volatile("vmovdqa %0,%%ymm15" : : "m" (dptr[z][d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 			asm volatile("vpxor %ymm5,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 			asm volatile("vpxor %ymm7,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 			asm volatile("vpxor %ymm13,%ymm10,%ymm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 			asm volatile("vpxor %ymm15,%ymm11,%ymm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 			asm volatile("vpxor %ymm13,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 			asm volatile("vpxor %ymm15,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 		asm volatile("vpxor %ymm2,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 		asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 		asm volatile("vpxor %ymm3,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 		asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 		asm volatile("vpxor %ymm10,%ymm10,%ymm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 		asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 		asm volatile("vpxor %ymm11,%ymm11,%ymm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 		asm volatile("vpxor %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 		asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 		asm volatile("vpxor %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 		asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 		asm volatile("vpxor %ymm12,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 		asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 		asm volatile("vpxor %ymm14,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) static void raid6_avx24_xor_syndrome(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 				     size_t bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 	u8 **dptr = (u8 **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 	u8 *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 	z0 = stop;		/* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 	p = dptr[disks-2];	/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 	q = dptr[disks-1];	/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 	kernel_fpu_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 	asm volatile("vmovdqa %0,%%ymm0" :: "m" (raid6_avx2_constants.x1d[0]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	for (d = 0 ; d < bytes ; d += 128) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 		asm volatile("vmovdqa %0,%%ymm4" :: "m" (dptr[z0][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 		asm volatile("vmovdqa %0,%%ymm6" :: "m" (dptr[z0][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 		asm volatile("vmovdqa %0,%%ymm12" :: "m" (dptr[z0][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 		asm volatile("vmovdqa %0,%%ymm14" :: "m" (dptr[z0][d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 		asm volatile("vmovdqa %0,%%ymm2" : : "m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 		asm volatile("vmovdqa %0,%%ymm3" : : "m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 		asm volatile("vmovdqa %0,%%ymm10" : : "m" (p[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 		asm volatile("vmovdqa %0,%%ymm11" : : "m" (p[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 		asm volatile("vpxor %ymm4,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 		asm volatile("vpxor %ymm6,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 		asm volatile("vpxor %ymm12,%ymm10,%ymm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 		asm volatile("vpxor %ymm14,%ymm11,%ymm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 		/* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 		for (z = z0-1 ; z >= start ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 			asm volatile("prefetchnta %0" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 			asm volatile("prefetchnta %0" :: "m" (dptr[z][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 			asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 			asm volatile("vpxor %ymm7,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 			asm volatile("vpxor %ymm13,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 			asm volatile("vpxor %ymm15,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 			asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 			asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 			asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 			asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 			asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 			asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 			asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 			asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 			asm volatile("vpand %ymm0,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 			asm volatile("vpand %ymm0,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 			asm volatile("vpxor %ymm13,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 			asm volatile("vpxor %ymm15,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 			asm volatile("vmovdqa %0,%%ymm5" :: "m" (dptr[z][d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 			asm volatile("vmovdqa %0,%%ymm7"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 				     :: "m" (dptr[z][d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 			asm volatile("vmovdqa %0,%%ymm13"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 				     :: "m" (dptr[z][d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 			asm volatile("vmovdqa %0,%%ymm15"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 				     :: "m" (dptr[z][d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 			asm volatile("vpxor %ymm5,%ymm2,%ymm2");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 			asm volatile("vpxor %ymm7,%ymm3,%ymm3");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 			asm volatile("vpxor %ymm13,%ymm10,%ymm10");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 			asm volatile("vpxor %ymm15,%ymm11,%ymm11");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 			asm volatile("vpxor %ymm13,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 			asm volatile("vpxor %ymm15,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 		asm volatile("prefetchnta %0" :: "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 		asm volatile("prefetchnta %0" :: "m" (q[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 		/* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 		for (z = start-1 ; z >= 0 ; z--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 			asm volatile("vpxor %ymm5,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 			asm volatile("vpxor %ymm7,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 			asm volatile("vpxor %ymm13,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 			asm volatile("vpxor %ymm15,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 			asm volatile("vpcmpgtb %ymm4,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 			asm volatile("vpcmpgtb %ymm6,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 			asm volatile("vpcmpgtb %ymm12,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 			asm volatile("vpcmpgtb %ymm14,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 			asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 			asm volatile("vpaddb %ymm6,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 			asm volatile("vpaddb %ymm12,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 			asm volatile("vpaddb %ymm14,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 			asm volatile("vpand %ymm0,%ymm5,%ymm5");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 			asm volatile("vpand %ymm0,%ymm7,%ymm7");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 			asm volatile("vpand %ymm0,%ymm13,%ymm13");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 			asm volatile("vpand %ymm0,%ymm15,%ymm15");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 			asm volatile("vpxor %ymm5,%ymm4,%ymm4");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 			asm volatile("vpxor %ymm7,%ymm6,%ymm6");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 			asm volatile("vpxor %ymm13,%ymm12,%ymm12");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 			asm volatile("vpxor %ymm15,%ymm14,%ymm14");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 		asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 		asm volatile("vmovntdq %%ymm3,%0" : "=m" (p[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 		asm volatile("vmovntdq %%ymm10,%0" : "=m" (p[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 		asm volatile("vmovntdq %%ymm11,%0" : "=m" (p[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 		asm volatile("vpxor %0,%%ymm4,%%ymm4" : : "m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 		asm volatile("vpxor %0,%%ymm6,%%ymm6" : : "m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 		asm volatile("vpxor %0,%%ymm12,%%ymm12" : : "m" (q[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 		asm volatile("vpxor %0,%%ymm14,%%ymm14" : : "m" (q[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 		asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 		asm volatile("vmovntdq %%ymm6,%0" : "=m" (q[d+32]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 		asm volatile("vmovntdq %%ymm12,%0" : "=m" (q[d+64]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 		asm volatile("vmovntdq %%ymm14,%0" : "=m" (q[d+96]));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 	asm volatile("sfence" : : : "memory");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 	kernel_fpu_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) const struct raid6_calls raid6_avx2x4 = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 	raid6_avx24_gen_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 	raid6_avx24_xor_syndrome,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) 	raid6_have_avx2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 	"avx2x4",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 	1			/* Has cache hints */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) #endif