Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) /* -----------------------------------------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  *   neon.uc - RAID-6 syndrome calculation using ARM NEON instructions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *   Copyright (C) 2012 Rob Herring
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *   Copyright (C) 2015 Linaro Ltd. <ard.biesheuvel@linaro.org>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  *   Based on altivec.uc:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  *     Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  *   This program is free software; you can redistribute it and/or modify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  *   it under the terms of the GNU General Public License as published by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  *   Boston MA 02111-1307, USA; either version 2 of the License, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  *   (at your option) any later version; incorporated herein by reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17)  * ----------------------------------------------------------------------- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20)  * neon$#.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22)  * $#-way unrolled NEON intrinsics math RAID-6 instruction set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24)  * This file is postprocessed using unroll.awk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) #include <arm_neon.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) typedef uint8x16_t unative_t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) #define NSIZE	sizeof(unative_t)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34)  * The SHLBYTE() operation shifts each byte left by 1, *not*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35)  * rolling over into the next byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) static inline unative_t SHLBYTE(unative_t v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	return vshlq_n_u8(v, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43)  * The MASK() operation returns 0xFF in any byte for which the high
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44)  * bit is 1, 0x00 for any byte for which the high bit is 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) static inline unative_t MASK(unative_t v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	return (unative_t)vshrq_n_s8((int8x16_t)v, 7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) static inline unative_t PMUL(unative_t v, unative_t u)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 	return (unative_t)vmulq_p8((poly8x16_t)v, (poly8x16_t)u);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	uint8_t **dptr = (uint8_t **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	uint8_t *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	const unative_t x1d = vdupq_n_u8(0x1d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 	z0 = disks - 3;		/* Highest data disk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	p = dptr[z0+1];		/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	q = dptr[z0+2];		/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 		wq$$ = wp$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 		for ( z = z0-1 ; z >= 0 ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 			wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 			wp$$ = veorq_u8(wp$$, wd$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 			w2$$ = MASK(wq$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 			w1$$ = SHLBYTE(wq$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 			w2$$ = vandq_u8(w2$$, x1d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 			w1$$ = veorq_u8(w1$$, w2$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 			wq$$ = veorq_u8(w1$$, wd$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 		vst1q_u8(&p[d+NSIZE*$$], wp$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 		vst1q_u8(&q[d+NSIZE*$$], wq$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) void raid6_neon$#_xor_syndrome_real(int disks, int start, int stop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 				    unsigned long bytes, void **ptrs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	uint8_t **dptr = (uint8_t **)ptrs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	uint8_t *p, *q;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	int d, z, z0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	const unative_t x1d = vdupq_n_u8(0x1d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	z0 = stop;		/* P/Q right side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	p = dptr[disks-2];	/* XOR parity */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	q = dptr[disks-1];	/* RS syndrome */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 		wq$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 		wp$$ = veorq_u8(vld1q_u8(&p[d+$$*NSIZE]), wq$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 		/* P/Q data pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 		for ( z = z0-1 ; z >= start ; z-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 			wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 			wp$$ = veorq_u8(wp$$, wd$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 			w2$$ = MASK(wq$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 			w1$$ = SHLBYTE(wq$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 			w2$$ = vandq_u8(w2$$, x1d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 			w1$$ = veorq_u8(w1$$, w2$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 			wq$$ = veorq_u8(w1$$, wd$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 		/* P/Q left side optimization */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 		for ( z = start-1 ; z >= 3 ; z -= 4 ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 			w2$$ = vshrq_n_u8(wq$$, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 			w1$$ = vshlq_n_u8(wq$$, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 			w2$$ = PMUL(w2$$, x1d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 			wq$$ = veorq_u8(w1$$, w2$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 		switch (z) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 		case 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 			w2$$ = vshrq_n_u8(wq$$, 5);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 			w1$$ = vshlq_n_u8(wq$$, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 			w2$$ = PMUL(w2$$, x1d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 			wq$$ = veorq_u8(w1$$, w2$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 		case 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 			w2$$ = vshrq_n_u8(wq$$, 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 			w1$$ = vshlq_n_u8(wq$$, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 			w2$$ = PMUL(w2$$, x1d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 			wq$$ = veorq_u8(w1$$, w2$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 		case 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 			w2$$ = MASK(wq$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 			w1$$ = SHLBYTE(wq$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 			w2$$ = vandq_u8(w2$$, x1d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 			wq$$ = veorq_u8(w1$$, w2$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 		w1$$ = vld1q_u8(&q[d+NSIZE*$$]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 		wq$$ = veorq_u8(wq$$, w1$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 		vst1q_u8(&p[d+NSIZE*$$], wp$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 		vst1q_u8(&q[d+NSIZE*$$], wq$$);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) }