Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * NH - ε-almost-universal hash function, x86_64 SSE2 accelerated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  * Copyright 2018 Google LLC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  * Author: Eric Biggers <ebiggers@google.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) #define		PASS0_SUMS	%xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) #define		PASS1_SUMS	%xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #define		PASS2_SUMS	%xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) #define		PASS3_SUMS	%xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) #define		K0		%xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #define		K1		%xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #define		K2		%xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) #define		K3		%xmm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #define		T0		%xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #define		T1		%xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) #define		T2		%xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #define		T3		%xmm11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #define		T4		%xmm12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #define		T5		%xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #define		T6		%xmm14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) #define		T7		%xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) #define		KEY		%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) #define		MESSAGE		%rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) #define		MESSAGE_LEN	%rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) #define		HASH		%rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) .macro _nh_stride	k0, k1, k2, k3, offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	// Load next message stride
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	movdqu		\offset(MESSAGE), T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 	// Load next key stride
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	movdqu		\offset(KEY), \k3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	// Add message words to key words
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	movdqa		T1, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	movdqa		T1, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 	paddd		T1, \k0    // reuse k0 to avoid a move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	paddd		\k1, T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	paddd		\k2, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	paddd		\k3, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	// Multiply 32x32 => 64 and accumulate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 	pshufd		$0x10, \k0, T4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 	pshufd		$0x32, \k0, \k0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 	pshufd		$0x10, T1, T5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 	pshufd		$0x32, T1, T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 	pshufd		$0x10, T2, T6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 	pshufd		$0x32, T2, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	pshufd		$0x10, T3, T7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 	pshufd		$0x32, T3, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	pmuludq		T4, \k0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	pmuludq		T5, T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	pmuludq		T6, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 	pmuludq		T7, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	paddq		\k0, PASS0_SUMS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	paddq		T1, PASS1_SUMS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	paddq		T2, PASS2_SUMS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 	paddq		T3, PASS3_SUMS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69)  * void nh_sse2(const u32 *key, const u8 *message, size_t message_len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70)  *		u8 hash[NH_HASH_BYTES])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72)  * It's guaranteed that message_len % 16 == 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) SYM_FUNC_START(nh_sse2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	movdqu		0x00(KEY), K0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	movdqu		0x10(KEY), K1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	movdqu		0x20(KEY), K2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	add		$0x30, KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	pxor		PASS0_SUMS, PASS0_SUMS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	pxor		PASS1_SUMS, PASS1_SUMS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	pxor		PASS2_SUMS, PASS2_SUMS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	pxor		PASS3_SUMS, PASS3_SUMS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	sub		$0x40, MESSAGE_LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	jl		.Lloop4_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) .Lloop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	_nh_stride	K0, K1, K2, K3, 0x00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	_nh_stride	K1, K2, K3, K0, 0x10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	_nh_stride	K2, K3, K0, K1, 0x20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	_nh_stride	K3, K0, K1, K2, 0x30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 	add		$0x40, KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	add		$0x40, MESSAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	sub		$0x40, MESSAGE_LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	jge		.Lloop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) .Lloop4_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	and		$0x3f, MESSAGE_LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 	jz		.Ldone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	_nh_stride	K0, K1, K2, K3, 0x00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	sub		$0x10, MESSAGE_LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	jz		.Ldone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	_nh_stride	K1, K2, K3, K0, 0x10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	sub		$0x10, MESSAGE_LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	jz		.Ldone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	_nh_stride	K2, K3, K0, K1, 0x20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) .Ldone:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	// Sum the accumulators for each pass, then store the sums to 'hash'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	movdqa		PASS0_SUMS, T0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	movdqa		PASS2_SUMS, T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	punpcklqdq	PASS1_SUMS, T0		// => (PASS0_SUM_A PASS1_SUM_A)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	punpcklqdq	PASS3_SUMS, T1		// => (PASS2_SUM_A PASS3_SUM_A)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	punpckhqdq	PASS1_SUMS, PASS0_SUMS	// => (PASS0_SUM_B PASS1_SUM_B)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 	punpckhqdq	PASS3_SUMS, PASS2_SUMS	// => (PASS2_SUM_B PASS3_SUM_B)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 	paddq		PASS0_SUMS, T0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	paddq		PASS2_SUMS, T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	movdqu		T0, 0x00(HASH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	movdqu		T1, 0x10(HASH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) SYM_FUNC_END(nh_sse2)