Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * Fast SHA-256 implementation for SPE instruction set (PPC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  * This code makes use of the SPE SIMD instruction set as defined in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  * Implementation is based on optimization guide notes from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) #include <asm/ppc_asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #include <asm/asm-offsets.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) #define rHP	r3	/* pointer to hash values in memory		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #define rKP	r24	/* pointer to round constants			*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #define rWP	r4	/* pointer to input data			*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #define rH0	r5	/* 8 32 bit hash values in 8 registers		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #define rH1	r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) #define rH2	r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #define rH3	r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #define rH4	r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #define rH5	r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #define rH6	r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) #define rH7	r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) #define rW0	r14	/* 64 bit registers. 16 words in 8 registers	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) #define rW1	r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) #define rW2	r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) #define rW3	r17
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) #define rW4	r18
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) #define rW5	r19
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) #define rW6	r20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) #define rW7	r21
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) #define rT0	r22	/* 64 bit temporaries 				*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) #define rT1	r23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) #define rT2	r0	/* 32 bit temporaries				*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) #define rT3	r25
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) #define CMP_KN_LOOP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) #define CMP_KC_LOOP \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	cmpwi		rT1,0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) #define INITIALIZE \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	stwu		r1,-128(r1);	/* create stack frame		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	evstdw		r14,8(r1);	/* We must save non volatile	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 	evstdw		r15,16(r1);	/* registers. Take the chance	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) 	evstdw		r16,24(r1);	/* and save the SPE part too	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 	evstdw		r17,32(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 	evstdw		r18,40(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 	evstdw		r19,48(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 	evstdw		r20,56(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	evstdw		r21,64(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 	evstdw		r22,72(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	evstdw		r23,80(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	stw		r24,88(r1);	/* save normal registers	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	stw		r25,92(r1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) #define FINALIZE \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	evldw		r14,8(r1);	/* restore SPE registers	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 	evldw		r15,16(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	evldw		r16,24(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	evldw		r17,32(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	evldw		r18,40(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	evldw		r19,48(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 	evldw		r20,56(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	evldw		r21,64(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 	evldw		r22,72(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 	evldw		r23,80(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	lwz		r24,88(r1);	/* restore normal registers	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	lwz		r25,92(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	xor		r0,r0,r0;					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	stw		r0,8(r1);	/* Delete sensitive data	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	stw		r0,16(r1);	/* that we might have pushed	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	stw		r0,24(r1);	/* from other context that runs	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	stw		r0,32(r1);	/* the same code. Assume that	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	stw		r0,40(r1);	/* the lower part of the GPRs	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	stw		r0,48(r1);	/* was already overwritten on	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	stw		r0,56(r1);	/* the way down to here		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 	stw		r0,64(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	stw		r0,72(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	stw		r0,80(r1);					   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	addi		r1,r1,128;	/* cleanup stack frame		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) #ifdef __BIG_ENDIAN__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) #define LOAD_DATA(reg, off) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	lwz		reg,off(rWP);	/* load data			*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) #define NEXT_BLOCK \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	addi		rWP,rWP,64;	/* increment per block		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) #define LOAD_DATA(reg, off) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	lwbrx		reg,0,rWP; 	/* load data			*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	addi		rWP,rWP,4;	/* increment per word		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) #define NEXT_BLOCK			/* nothing to do		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) #define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	LOAD_DATA(w, off)		/* 1: W				*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	rotrwi		rT0,e,6;	/* 1: S1 = e rotr 6		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	rotrwi		rT1,e,11;	/* 1: S1' = e rotr 11		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	rotrwi		rT2,e,25;	/* 1: S1" = e rotr 25		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	xor		rT0,rT0,rT1;	/* 1: S1 = S1 xor S1'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	and		rT3,e,f;	/* 1: ch = e and f		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	xor		rT0,rT0,rT2;	/* 1: S1 = S1 xor S1"		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	andc		rT1,g,e;	/* 1: ch' = ~e and g		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 	lwz		rT2,off(rKP);	/* 1: K				*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	xor		rT3,rT3,rT1;	/* 1: ch = ch xor ch'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	add		h,h,rT0;	/* 1: temp1 = h + S1		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	add		rT3,rT3,w;	/* 1: temp1' = ch + w		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	rotrwi		rT0,a,2;	/* 1: S0 = a rotr 2		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	add		h,h,rT3;	/* 1: temp1 = temp1 + temp1'	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	rotrwi		rT1,a,13;	/* 1: S0' = a rotr 13		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 	add		h,h,rT2;	/* 1: temp1 = temp1 + K		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 	rotrwi		rT3,a,22;	/* 1: S0" = a rotr 22		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	xor		rT0,rT0,rT1;	/* 1: S0 = S0 xor S0'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	add		d,d,h;		/* 1: d = d + temp1		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	xor		rT3,rT0,rT3;	/* 1: S0 = S0 xor S0"		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	evmergelo	w,w,w;		/*    shift W			*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	or		rT2,a,b;	/* 1: maj = a or b		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	and		rT1,a,b;	/* 1: maj' = a and b		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	and		rT2,rT2,c;	/* 1: maj = maj and c		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	LOAD_DATA(w, off+4)		/* 2: W				*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	or		rT2,rT1,rT2;	/* 1: maj = maj or maj'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	add		rT3,rT3,rT2;	/* 1: temp2 = S0 + maj		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	add		h,h,rT3;	/* 1: h = temp1 + temp2		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	rotrwi		rT2,d,25;	/* 2: S1" = e rotr 25		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	and		rT3,d,e;	/* 2: ch = e and f		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	lwz		rT2,off+4(rKP);	/* 2: K				*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	add		rT3,rT3,w;	/* 2: temp1' = ch + w		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 	add		g,g,rT3;	/* 2: temp1 = temp1 + temp1'	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 	add		g,g,rT2;	/* 2: temp1 = temp1 + K		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 	or		rT2,h,a;	/* 2: maj = a or b		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	add		c,c,g;		/* 2: d = d + temp1		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) #define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	rotrwi		rT2,e,6;	/* 1: S1 = e rotr 6		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	evmergelohi	rT0,w0,w1;	/*    w[-15]			*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	rotrwi		rT3,e,11;	/* 1: S1' = e rotr 11		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	evsrwiu		rT1,rT0,3;	/*    s0 = w[-15] >> 3		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	evrlwi		rT0,rT0,25;	/*    s0' = w[-15] rotr	7	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 	rotrwi		rT3,e,25;	/* 1: S1' = e rotr 25		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	evxor		rT1,rT1,rT0;	/*    s0 = s0 xor s0'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 	xor		rT2,rT2,rT3;	/* 1: S1 = S1 xor S1'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	evrlwi		rT0,rT0,21;	/*    s0' = w[-15] rotr 18	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	add		h,h,rT2;	/* 1: temp1 = h + S1		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	evxor		rT0,rT0,rT1;	/*    s0 = s0 xor s0'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 	and		rT2,e,f;	/* 1: ch = e and f		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 	evaddw		w0,w0,rT0;	/*    w = w[-16] + s0		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	andc		rT3,g,e;	/* 1: ch' = ~e and g		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 	evsrwiu		rT0,w7,10;	/*    s1 = w[-2] >> 10		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	xor		rT2,rT2,rT3;	/* 1: ch = ch xor ch'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 	evrlwi		rT1,w7,15;	/*    s1' = w[-2] rotr 17	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	add		h,h,rT2;	/* 1: temp1 = temp1 + ch	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	rotrwi		rT2,a,2;	/* 1: S0 = a rotr 2		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	evrlwi		rT1,w7,13;	/*    s1' = w[-2] rotr 19	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	rotrwi		rT3,a,13;	/* 1: S0' = a rotr 13		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 	evxor		rT0,rT0,rT1;	/*    s1 = s1 xor s1'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 	evldw		rT1,off(rKP);	/*    k				*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 	rotrwi		rT3,a,22;	/* 1: S0' = a rotr 22		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 	evaddw		w0,w0,rT0;	/*    w = w + s1		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 	xor		rT2,rT2,rT3;	/* 1: S0 = S0 xor S0'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	evmergelohi	rT0,w4,w5;	/*    w[-7]			*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 	and		rT3,a,b;	/* 1: maj = a and b		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	evaddw		w0,w0,rT0;	/*    w = w + w[-7]		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 	CMP_K##k##_LOOP							   \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	add		rT2,rT2,rT3;	/* 1: temp2 = S0 + maj		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	evaddw		rT1,rT1,w0;	/*    wk = w + k		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 	xor		rT3,a,b;	/* 1: maj = a xor b		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 	evmergehi	rT0,rT1,rT1;	/*    wk1/wk2			*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 	and		rT3,rT3,c;	/* 1: maj = maj and c		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	add		h,h,rT0;	/* 1: temp1 = temp1 + wk	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	add		rT2,rT2,rT3;	/* 1: temp2 = temp2 + maj	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 	add		g,g,rT1;	/* 2: temp1 = temp1 + wk	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	add		d,d,h;		/* 1: d = d + temp1		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 	rotrwi		rT0,d,6;	/* 2: S1 = e rotr 6		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	add		h,h,rT2;	/* 1: h = temp1 + temp2		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 	rotrwi		rT1,d,11;	/* 2: S1' = e rotr 11		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	rotrwi		rT2,d,25;	/* 2: S" = e rotr 25		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	xor		rT0,rT0,rT1;	/* 2: S1 = S1 xor S1'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 	and		rT3,d,e;	/* 2: ch = e and f		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 	xor		rT0,rT0,rT2;	/* 2: S1 = S1 xor S1"		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 	andc		rT1,f,d;	/* 2: ch' = ~e and g		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 	add		g,g,rT0;	/* 2: temp1 = h + S1		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 	xor		rT3,rT3,rT1;	/* 2: ch = ch xor ch'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 	rotrwi		rT0,h,2;	/* 2: S0 = a rotr 2		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 	add		g,g,rT3;	/* 2: temp1 = temp1 + ch	*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 	rotrwi		rT1,h,13;	/* 2: S0' = a rotr 13		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 	rotrwi		rT3,h,22;	/* 2: S0" = a rotr 22		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	xor		rT0,rT0,rT1;	/* 2: S0 = S0 xor S0'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 	or		rT2,h,a;	/* 2: maj = a or b		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 	and		rT1,h,a;	/* 2: maj' = a and b		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	and		rT2,rT2,b;	/* 2: maj = maj and c		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 	xor		rT3,rT0,rT3;	/* 2: S0 = S0 xor S0"		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 	or		rT2,rT1,rT2;	/* 2: maj = maj or maj'		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 	add		c,c,g;		/* 2: d = d + temp1		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 	add		rT3,rT3,rT2;	/* 2: temp2 = S0 + maj		*/ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	add		g,g,rT3		/* 2: h = temp1 + temp2		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) _GLOBAL(ppc_spe_sha256_transform)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	INITIALIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	mtctr		r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) 	lwz		rH0,0(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 	lwz		rH1,4(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	lwz		rH2,8(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	lwz		rH3,12(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	lwz		rH4,16(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	lwz		rH5,20(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 	lwz		rH6,24(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	lwz		rH7,28(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) ppc_spe_sha256_main:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 	lis		rKP,PPC_SPE_SHA256_K@ha
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 	addi		rKP,rKP,PPC_SPE_SHA256_K@l
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) ppc_spe_sha256_16_rounds:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	addi		rKP,rKP,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 		 rW0, rW1, rW4, rW5, rW7, N, 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 		 rW1, rW2, rW5, rW6, rW0, N, 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 		 rW2, rW3, rW6, rW7, rW1, N, 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 		 rW3, rW4, rW7, rW0, rW2, N, 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 	R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 		 rW4, rW5, rW0, rW1, rW3, N, 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 	R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 		 rW5, rW6, rW1, rW2, rW4, N, 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 	R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 		 rW6, rW7, rW2, rW3, rW5, N, 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 		 rW7, rW0, rW3, rW4, rW6, C, 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 	bt		gt,ppc_spe_sha256_16_rounds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 	lwz		rW0,0(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 	NEXT_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	lwz		rW1,4(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 	lwz		rW2,8(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 	lwz		rW3,12(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 	lwz		rW4,16(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 	lwz		rW5,20(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 	lwz		rW6,24(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	lwz		rW7,28(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 	add		rH0,rH0,rW0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 	stw		rH0,0(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 	add		rH1,rH1,rW1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	stw		rH1,4(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 	add		rH2,rH2,rW2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 	stw		rH2,8(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 	add		rH3,rH3,rW3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 	stw		rH3,12(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	add		rH4,rH4,rW4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 	stw		rH4,16(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	add		rH5,rH5,rW5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 	stw		rH5,20(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	add		rH6,rH6,rW6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 	stw		rH6,24(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 	add		rH7,rH7,rW7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	stw		rH7,28(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	bdnz		ppc_spe_sha256_main
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	FINALIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) .data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) PPC_SPE_SHA256_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 	.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 	.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 	.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 	.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 	.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 	.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 	.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 	.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 	.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 	.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 	.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 	.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 	.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 	.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 	.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2