Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags   |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * Copyright (C) 2012 Intel Corporation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7) #include <arm_neon.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) #ifdef CONFIG_ARM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  * AArch32 does not provide this intrinsic natively because it does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  * implement the underlying instruction. AArch32 only provides a 64-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  * wide vtbl.8 instruction, so use that instead.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) static uint8x16_t vqtbl1q_u8(uint8x16_t a, uint8x16_t b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) 	union {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) 		uint8x16_t	val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) 		uint8x8x2_t	pair;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) 	} __a = { a };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) 	return vcombine_u8(vtbl2_u8(__a.pair, vget_low_u8(b)),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) 			   vtbl2_u8(__a.pair, vget_high_u8(b)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 			      uint8_t *dq, const uint8_t *pbmul,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 			      const uint8_t *qmul)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 	uint8x16_t pm0 = vld1q_u8(pbmul);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 	uint8x16_t pm1 = vld1q_u8(pbmul + 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 	uint8x16_t qm0 = vld1q_u8(qmul);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 	uint8x16_t qm1 = vld1q_u8(qmul + 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	uint8x16_t x0f = vdupq_n_u8(0x0f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 	 * while ( bytes-- ) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	 *	uint8_t px, qx, db;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	 *	px    = *p ^ *dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	 *	qx    = qmul[*q ^ *dq];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	 *	*dq++ = db = pbmul[px] ^ qx;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	 *	*dp++ = db ^ px;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	 *	p++; q++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	 * }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	while (bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 		uint8x16_t vx, vy, px, qx, db;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 		px = veorq_u8(vld1q_u8(p), vld1q_u8(dp));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 		vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 		vy = vshrq_n_u8(vx, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 		vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 		vy = vqtbl1q_u8(qm1, vy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 		qx = veorq_u8(vx, vy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 		vy = vshrq_n_u8(px, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 		vx = vqtbl1q_u8(pm0, vandq_u8(px, x0f));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 		vy = vqtbl1q_u8(pm1, vy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 		vx = veorq_u8(vx, vy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 		db = veorq_u8(vx, qx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 		vst1q_u8(dq, db);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 		vst1q_u8(dp, veorq_u8(db, px));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 		bytes -= 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 		p += 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 		q += 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 		dp += 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 		dq += 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 			      const uint8_t *qmul)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	uint8x16_t qm0 = vld1q_u8(qmul);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	uint8x16_t qm1 = vld1q_u8(qmul + 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	uint8x16_t x0f = vdupq_n_u8(0x0f);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	 * while (bytes--) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	 *	*p++ ^= *dq = qmul[*q ^ *dq];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	 *	q++; dq++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	 * }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	while (bytes) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 		uint8x16_t vx, vy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 		vx = veorq_u8(vld1q_u8(q), vld1q_u8(dq));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 		vy = vshrq_n_u8(vx, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 		vx = vqtbl1q_u8(qm0, vandq_u8(vx, x0f));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 		vy = vqtbl1q_u8(qm1, vy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 		vx = veorq_u8(vx, vy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 		vy = veorq_u8(vx, vld1q_u8(p));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 		vst1q_u8(dq, vx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 		vst1q_u8(p, vy);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 		bytes -= 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 		p += 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 		q += 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 		dq += 16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) }