Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * arch/arm64/lib/xor-neon.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  * Authors: Jackie Liu <liuyun01@kylinos.cn>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * Copyright (C) 2018,Tianjin KYLIN Information Technology Co., Ltd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) #include <linux/raid/xor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) #include <linux/module.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) #include <asm/neon-intrinsics.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) 	unsigned long *p2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) 	uint64_t *dp1 = (uint64_t *)p1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) 	uint64_t *dp2 = (uint64_t *)p2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) 	register uint64x2_t v0, v1, v2, v3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) 	long lines = bytes / (sizeof(uint64x2_t) * 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) 		/* p1 ^= p2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) 		v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) 		v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 		v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 		v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 		/* store */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 		vst1q_u64(dp1 +  0, v0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 		vst1q_u64(dp1 +  2, v1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 		vst1q_u64(dp1 +  4, v2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 		vst1q_u64(dp1 +  6, v3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 		dp1 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 		dp2 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 	} while (--lines > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	unsigned long *p2, unsigned long *p3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	uint64_t *dp1 = (uint64_t *)p1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	uint64_t *dp2 = (uint64_t *)p2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	uint64_t *dp3 = (uint64_t *)p3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	register uint64x2_t v0, v1, v2, v3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	long lines = bytes / (sizeof(uint64x2_t) * 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 		/* p1 ^= p2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 		v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 		v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 		v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 		v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 		/* p1 ^= p3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 		v0 = veorq_u64(v0, vld1q_u64(dp3 +  0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 		v1 = veorq_u64(v1, vld1q_u64(dp3 +  2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 		v2 = veorq_u64(v2, vld1q_u64(dp3 +  4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 		v3 = veorq_u64(v3, vld1q_u64(dp3 +  6));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 		/* store */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 		vst1q_u64(dp1 +  0, v0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 		vst1q_u64(dp1 +  2, v1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 		vst1q_u64(dp1 +  4, v2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 		vst1q_u64(dp1 +  6, v3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 		dp1 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 		dp2 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 		dp3 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 	} while (--lines > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	unsigned long *p2, unsigned long *p3, unsigned long *p4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	uint64_t *dp1 = (uint64_t *)p1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	uint64_t *dp2 = (uint64_t *)p2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	uint64_t *dp3 = (uint64_t *)p3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	uint64_t *dp4 = (uint64_t *)p4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	register uint64x2_t v0, v1, v2, v3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 	long lines = bytes / (sizeof(uint64x2_t) * 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 		/* p1 ^= p2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 		v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 		v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 		v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 		v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 		/* p1 ^= p3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 		v0 = veorq_u64(v0, vld1q_u64(dp3 +  0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 		v1 = veorq_u64(v1, vld1q_u64(dp3 +  2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 		v2 = veorq_u64(v2, vld1q_u64(dp3 +  4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 		v3 = veorq_u64(v3, vld1q_u64(dp3 +  6));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 		/* p1 ^= p4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 		v0 = veorq_u64(v0, vld1q_u64(dp4 +  0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 		v1 = veorq_u64(v1, vld1q_u64(dp4 +  2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 		v2 = veorq_u64(v2, vld1q_u64(dp4 +  4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 		v3 = veorq_u64(v3, vld1q_u64(dp4 +  6));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 		/* store */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 		vst1q_u64(dp1 +  0, v0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 		vst1q_u64(dp1 +  2, v1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 		vst1q_u64(dp1 +  4, v2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 		vst1q_u64(dp1 +  6, v3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 		dp1 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 		dp2 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 		dp3 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 		dp4 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	} while (--lines > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	unsigned long *p2, unsigned long *p3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	unsigned long *p4, unsigned long *p5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	uint64_t *dp1 = (uint64_t *)p1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	uint64_t *dp2 = (uint64_t *)p2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	uint64_t *dp3 = (uint64_t *)p3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	uint64_t *dp4 = (uint64_t *)p4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	uint64_t *dp5 = (uint64_t *)p5;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	register uint64x2_t v0, v1, v2, v3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	long lines = bytes / (sizeof(uint64x2_t) * 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 		/* p1 ^= p2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 		v0 = veorq_u64(vld1q_u64(dp1 +  0), vld1q_u64(dp2 +  0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 		v1 = veorq_u64(vld1q_u64(dp1 +  2), vld1q_u64(dp2 +  2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 		v2 = veorq_u64(vld1q_u64(dp1 +  4), vld1q_u64(dp2 +  4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 		v3 = veorq_u64(vld1q_u64(dp1 +  6), vld1q_u64(dp2 +  6));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 		/* p1 ^= p3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 		v0 = veorq_u64(v0, vld1q_u64(dp3 +  0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 		v1 = veorq_u64(v1, vld1q_u64(dp3 +  2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 		v2 = veorq_u64(v2, vld1q_u64(dp3 +  4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 		v3 = veorq_u64(v3, vld1q_u64(dp3 +  6));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 		/* p1 ^= p4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 		v0 = veorq_u64(v0, vld1q_u64(dp4 +  0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 		v1 = veorq_u64(v1, vld1q_u64(dp4 +  2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 		v2 = veorq_u64(v2, vld1q_u64(dp4 +  4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 		v3 = veorq_u64(v3, vld1q_u64(dp4 +  6));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 		/* p1 ^= p5 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 		v0 = veorq_u64(v0, vld1q_u64(dp5 +  0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 		v1 = veorq_u64(v1, vld1q_u64(dp5 +  2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 		v2 = veorq_u64(v2, vld1q_u64(dp5 +  4));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 		v3 = veorq_u64(v3, vld1q_u64(dp5 +  6));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 		/* store */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 		vst1q_u64(dp1 +  0, v0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 		vst1q_u64(dp1 +  2, v1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 		vst1q_u64(dp1 +  4, v2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 		vst1q_u64(dp1 +  6, v3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 		dp1 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 		dp2 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 		dp3 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 		dp4 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 		dp5 += 8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	} while (--lines > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) struct xor_block_template const xor_block_inner_neon = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	.name	= "__inner_neon__",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 	.do_2	= xor_arm64_neon_2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	.do_3	= xor_arm64_neon_3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 	.do_4	= xor_arm64_neon_4,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	.do_5	= xor_arm64_neon_5,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) EXPORT_SYMBOL(xor_block_inner_neon);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) MODULE_AUTHOR("Jackie Liu <liuyun01@kylinos.cn>");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) MODULE_DESCRIPTION("ARMv8 XOR Extensions");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) MODULE_LICENSE("GPL");