Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) #!/usr/bin/env perl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) # SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4) # This code is taken from the OpenSSL project but the author (Andy Polyakov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5) # has relicensed it under the GPLv2. Therefore this program is free software;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6) # you can redistribute it and/or modify it under the terms of the GNU General
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7) # Public License version 2 as published by the Free Software Foundation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) # The original headers, including the original license headers, are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) # included below for completeness.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) # ====================================================================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) # project. The module is, however, dual licensed under OpenSSL and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) # CRYPTOGAMS licenses depending on where you obtain it. For further
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) # details see https://www.openssl.org/~appro/cryptogams/.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) # ====================================================================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) # SHA256 block procedure for ARMv4. May 2007.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) # Performance is ~2x better than gcc 3.4 generated code and in "abso-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) # byte [on single-issue Xscale PXA250 core].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) # July 2010.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) # Rescheduling for dual-issue pipeline resulted in 22% improvement on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) # Cortex A8 core and ~20 cycles per processed byte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) # February 2011.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) # Profiler-assisted and platform-specific optimization resulted in 16%
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) # improvement on Cortex A8 core and ~15.4 cycles per processed byte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) # September 2013.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) # Add NEON implementation. On Cortex A8 it was measured to process one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) # byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) # S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) # code (meaning that latter performs sub-optimally, nothing was done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) # about it).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) # May 2014.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) # Add ARMv8 code path performing at 2.0 cpb on Apple A7.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) open STDOUT,">$output";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) $ctx="r0";	$t0="r0";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) $inp="r1";	$t4="r1";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) $len="r2";	$t1="r2";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) $T1="r3";	$t3="r3";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) $A="r4";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) $B="r5";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) $C="r6";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) $D="r7";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) $E="r8";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) $F="r9";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) $G="r10";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) $H="r11";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) @V=($A,$B,$C,$D,$E,$F,$G,$H);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) $t2="r12";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) $Ktbl="r14";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) @Sigma0=( 2,13,22);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) @Sigma1=( 6,11,25);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) @sigma0=( 7,18, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) @sigma1=(17,19,10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) sub BODY_00_15 {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) $code.=<<___ if ($i<16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) #if __ARM_ARCH__>=7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	@ ldr	$t1,[$inp],#4			@ $i
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) # if $i==15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	str	$inp,[sp,#17*4]			@ make room for $t4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) # ifndef __ARMEB__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 	rev	$t1,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	@ ldrb	$t1,[$inp,#3]			@ $i
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	ldrb	$t2,[$inp,#2]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	ldrb	$t0,[$inp,#1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	orr	$t1,$t1,$t2,lsl#8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 	ldrb	$t2,[$inp],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	orr	$t1,$t1,$t0,lsl#16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) # if $i==15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	str	$inp,[sp,#17*4]			@ make room for $t4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	orr	$t1,$t1,$t2,lsl#24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	ldr	$t2,[$Ktbl],#4			@ *K256++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	add	$h,$h,$t1			@ h+=X[i]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	str	$t1,[sp,#`$i%16`*4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	eor	$t1,$f,$g
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	add	$h,$h,$t0,ror#$Sigma1[0]	@ h+=Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	and	$t1,$t1,$e
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	add	$h,$h,$t2			@ h+=K256[i]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 	eor	$t1,$t1,$g			@ Ch(e,f,g)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	eor	$t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	add	$h,$h,$t1			@ h+=Ch(e,f,g)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) #if $i==31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	and	$t2,$t2,#0xff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	cmp	$t2,#0xf2			@ done?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) #if $i<15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) # if __ARM_ARCH__>=7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	ldr	$t1,[$inp],#4			@ prefetch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	ldrb	$t1,[$inp,#3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	eor	$t2,$a,$b			@ a^b, b^c in next round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	ldr	$t1,[sp,#`($i+2)%16`*4]		@ from future BODY_16_xx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	eor	$t2,$a,$b			@ a^b, b^c in next round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	ldr	$t4,[sp,#`($i+15)%16`*4]	@ from future BODY_16_xx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	eor	$t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]`	@ Sigma0(a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	and	$t3,$t3,$t2			@ (b^c)&=(a^b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	add	$d,$d,$h			@ d+=h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	eor	$t3,$t3,$b			@ Maj(a,b,c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	add	$h,$h,$t0,ror#$Sigma0[0]	@ h+=Sigma0(a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	@ add	$h,$h,$t3			@ h+=Maj(a,b,c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	($t2,$t3)=($t3,$t2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) sub BODY_16_XX {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	@ ldr	$t1,[sp,#`($i+1)%16`*4]		@ $i
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 	@ ldr	$t4,[sp,#`($i+14)%16`*4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	mov	$t0,$t1,ror#$sigma0[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	add	$a,$a,$t2			@ h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 	mov	$t2,$t4,ror#$sigma1[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	eor	$t0,$t0,$t1,ror#$sigma0[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	eor	$t2,$t2,$t4,ror#$sigma1[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	eor	$t0,$t0,$t1,lsr#$sigma0[2]	@ sigma0(X[i+1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	ldr	$t1,[sp,#`($i+0)%16`*4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 	eor	$t2,$t2,$t4,lsr#$sigma1[2]	@ sigma1(X[i+14])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	ldr	$t4,[sp,#`($i+9)%16`*4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 	add	$t2,$t2,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 	eor	$t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`	@ from BODY_00_15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	add	$t1,$t1,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	eor	$t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]`	@ Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	add	$t1,$t1,$t4			@ X[i]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	&BODY_00_15(@_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) $code=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) #ifndef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) # include "arm_arch.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) # define __ARM_ARCH__ __LINUX_ARM_ARCH__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) # define __ARM_MAX_ARCH__ 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) #if __ARM_ARCH__<7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) .code	32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) .syntax unified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) # ifdef __thumb2__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) .thumb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) .code   32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) .type	K256,%object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) .align	5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) K256:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) .word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) .word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) .word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) .word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) .word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) .word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) .word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) .word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) .word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) .word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) .word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) .word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) .word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) .word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) .word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) .word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) .size	K256,.-K256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) .word	0				@ terminator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) .LOPENSSL_armcap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) .word	OPENSSL_armcap_P-sha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) .align	5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) .global	sha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) .type	sha256_block_data_order,%function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) sha256_block_data_order:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) .Lsha256_block_data_order:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) #if __ARM_ARCH__<7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	sub	r3,pc,#8		@ sha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 	adr	r3,.Lsha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	ldr	r12,.LOPENSSL_armcap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 	tst	r12,#ARMV8_SHA256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	bne	.LARMv8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 	tst	r12,#ARMV7_NEON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	bne	.LNEON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	stmdb	sp!,{$ctx,$inp,$len,r4-r11,lr}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	ldmia	$ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	sub	$Ktbl,r3,#256+32	@ K256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	sub	sp,sp,#16*4		@ alloca(X[16])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) .Loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) # if __ARM_ARCH__>=7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 	ldr	$t1,[$inp],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 	ldrb	$t1,[$inp,#3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 	eor	$t3,$B,$C		@ magic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	eor	$t2,$t2,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) for($i=0;$i<16;$i++)	{ &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) $code.=".Lrounds_16_xx:\n";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) for (;$i<32;$i++)	{ &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) #if __ARM_ARCH__>=7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	ite	eq			@ Thumb2 thing, sanity check in ARM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	ldreq	$t3,[sp,#16*4]		@ pull ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 	bne	.Lrounds_16_xx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	add	$A,$A,$t2		@ h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	ldr	$t0,[$t3,#0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 	ldr	$t1,[$t3,#4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 	ldr	$t2,[$t3,#8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 	add	$A,$A,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 	ldr	$t0,[$t3,#12]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 	add	$B,$B,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 	ldr	$t1,[$t3,#16]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 	add	$C,$C,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 	ldr	$t2,[$t3,#20]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 	add	$D,$D,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 	ldr	$t0,[$t3,#24]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	add	$E,$E,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 	ldr	$t1,[$t3,#28]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 	add	$F,$F,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	ldr	$inp,[sp,#17*4]		@ pull inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 	ldr	$t2,[sp,#18*4]		@ pull inp+len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 	add	$G,$G,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	add	$H,$H,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 	stmia	$t3,{$A,$B,$C,$D,$E,$F,$G,$H}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 	cmp	$inp,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 	sub	$Ktbl,$Ktbl,#256	@ rewind Ktbl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 	bne	.Loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	add	sp,sp,#`16+3`*4	@ destroy frame
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) #if __ARM_ARCH__>=5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 	ldmia	sp!,{r4-r11,pc}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 	ldmia	sp!,{r4-r11,lr}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	tst	lr,#1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 	moveq	pc,lr			@ be binary compatible with V4, yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 	bx	lr			@ interoperable with Thumb ISA:-)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) .size	sha256_block_data_order,.-sha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) ######################################################################
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) # NEON stuff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) {{{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) my @X=map("q$_",(0..3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) my $Xfer=$t4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) my $j=0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) sub Dlo()   { shift=~m|q([1]?[0-9])|?"d".($1*2):"";     }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) sub Dhi()   { shift=~m|q([1]?[0-9])|?"d".($1*2+1):"";   }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) sub AUTOLOAD()          # thunk [simplified] x86-style perlasm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)   my $arg = pop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302)     $arg = "#$arg" if ($arg*1 eq $arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)     $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) sub Xupdate()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) { use integer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)   my $body = shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)   my @insns = (&$body,&$body,&$body,&$body);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)   my ($a,$b,$c,$d,$e,$f,$g,$h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 	&vext_8		($T0,@X[0],@X[1],4);	# X[1..4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 	&vext_8		($T1,@X[2],@X[3],4);	# X[9..12]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 	&vshr_u32	($T2,$T0,$sigma0[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += X[9..12]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 	&vshr_u32	($T1,$T0,$sigma0[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 	&vsli_32	($T2,$T0,32-$sigma0[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 	&vshr_u32	($T3,$T0,$sigma0[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 	&veor		($T1,$T1,$T2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 	&vsli_32	($T3,$T0,32-$sigma0[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 	&veor		($T1,$T1,$T3);		# sigma0(X[1..4])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 	  &vshr_u32	($T5,&Dhi(@X[3]),$sigma1[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 	&vadd_i32	(@X[0],@X[0],$T1);	# X[0..3] += sigma0(X[1..4])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 	  &veor		($T5,$T5,$T4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 	  &vshr_u32	($T4,&Dhi(@X[3]),$sigma1[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 	  &vsli_32	($T4,&Dhi(@X[3]),32-$sigma1[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 	  &veor		($T5,$T5,$T4);		# sigma1(X[14..15])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	&vadd_i32	(&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 	  &vshr_u32	($T5,&Dlo(@X[0]),$sigma1[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 	  &veor		($T5,$T5,$T4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 	  &vshr_u32	($T4,&Dlo(@X[0]),$sigma1[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 	&vld1_32	("{$T0}","[$Ktbl,:128]!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 	  &vsli_32	($T4,&Dlo(@X[0]),32-$sigma1[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 	  &veor		($T5,$T5,$T4);		# sigma1(X[16..17])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 	&vadd_i32	(&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 	&vadd_i32	($T0,$T0,@X[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 	 while($#insns>=2) { eval(shift(@insns)); }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 	&vst1_32	("{$T0}","[$Xfer,:128]!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 	push(@X,shift(@X));		# "rotate" X[]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) sub Xpreload()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) { use integer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409)   my $body = shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)   my @insns = (&$body,&$body,&$body,&$body);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)   my ($a,$b,$c,$d,$e,$f,$g,$h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 	&vld1_32	("{$T0}","[$Ktbl,:128]!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 	&vrev32_8	(@X[0],@X[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 	 eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 	&vadd_i32	($T0,$T0,@X[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 	 foreach (@insns) { eval; }	# remaining instructions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 	&vst1_32	("{$T0}","[$Xfer,:128]!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 	push(@X,shift(@X));		# "rotate" X[]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) sub body_00_15 () {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 	(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 	'($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 	'&add	($h,$h,$t1)',			# h+=X[i]+K[i]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 	'&eor	($t1,$f,$g)',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 	'&eor	($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 	'&add	($a,$a,$t2)',			# h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 	'&and	($t1,$t1,$e)',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 	'&eor	($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))',	# Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 	'&eor	($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 	'&eor	($t1,$t1,$g)',			# Ch(e,f,g)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 	'&add	($h,$h,$t2,"ror#$Sigma1[0]")',	# h+=Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 	'&eor	($t2,$a,$b)',			# a^b, b^c in next round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 	'&eor	($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))',	# Sigma0(a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 	'&add	($h,$h,$t1)',			# h+=Ch(e,f,g)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 	'&ldr	($t1,sprintf "[sp,#%d]",4*(($j+1)&15))	if (($j&15)!=15);'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 	'&ldr	($t1,"[$Ktbl]")				if ($j==15);'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 	'&ldr	($t1,"[sp,#64]")			if ($j==31)',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 	'&and	($t3,$t3,$t2)',			# (b^c)&=(a^b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 	'&add	($d,$d,$h)',			# d+=h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 	'&add	($h,$h,$t0,"ror#$Sigma0[0]");'.	# h+=Sigma0(a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 	'&eor	($t3,$t3,$b)',			# Maj(a,b,c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 	'$j++;	unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 	)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) #if __ARM_MAX_ARCH__>=7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) .arch	armv7-a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) .fpu	neon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) .global	sha256_block_data_order_neon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) .type	sha256_block_data_order_neon,%function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) .align	4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) sha256_block_data_order_neon:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) .LNEON:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 	stmdb	sp!,{r4-r12,lr}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) 	sub	$H,sp,#16*4+16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) 	adr	$Ktbl,.Lsha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 	sub	$Ktbl,$Ktbl,#.Lsha256_block_data_order-K256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 	bic	$H,$H,#15		@ align for 128-bit stores
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 	mov	$t2,sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 	mov	sp,$H			@ alloca
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) 	vld1.8		{@X[0]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) 	vld1.8		{@X[1]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 	vld1.8		{@X[2]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 	vld1.8		{@X[3]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) 	vld1.32		{$T0},[$Ktbl,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 	vld1.32		{$T1},[$Ktbl,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 	vld1.32		{$T2},[$Ktbl,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) 	vld1.32		{$T3},[$Ktbl,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 	vrev32.8	@X[0],@X[0]		@ yes, even on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) 	str		$ctx,[sp,#64]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) 	vrev32.8	@X[1],@X[1]		@ big-endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) 	str		$inp,[sp,#68]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 	mov		$Xfer,sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) 	vrev32.8	@X[2],@X[2]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 	str		$len,[sp,#72]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 	vrev32.8	@X[3],@X[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 	str		$t2,[sp,#76]		@ save original sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 	vadd.i32	$T0,$T0,@X[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 	vadd.i32	$T1,$T1,@X[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 	vst1.32		{$T0},[$Xfer,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) 	vadd.i32	$T2,$T2,@X[2]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 	vst1.32		{$T1},[$Xfer,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 	vadd.i32	$T3,$T3,@X[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) 	vst1.32		{$T2},[$Xfer,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 	vst1.32		{$T3},[$Xfer,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) 	ldmia		$ctx,{$A-$H}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) 	sub		$Xfer,$Xfer,#64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) 	ldr		$t1,[sp,#0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) 	eor		$t2,$t2,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) 	eor		$t3,$B,$C
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) 	b		.L_00_48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) .align	4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) .L_00_48:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 	&Xupdate(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) 	&Xupdate(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) 	&Xupdate(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) 	&Xupdate(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) 	teq	$t1,#0				@ check for K256 terminator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) 	ldr	$t1,[sp,#0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) 	sub	$Xfer,$Xfer,#64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) 	bne	.L_00_48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) 	ldr		$inp,[sp,#68]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) 	ldr		$t0,[sp,#72]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) 	sub		$Ktbl,$Ktbl,#256	@ rewind $Ktbl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) 	teq		$inp,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) 	it		eq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) 	subeq		$inp,$inp,#64		@ avoid SEGV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) 	vld1.8		{@X[0]},[$inp]!		@ load next input block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) 	vld1.8		{@X[1]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) 	vld1.8		{@X[2]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 	vld1.8		{@X[3]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) 	it		ne
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) 	strne		$inp,[sp,#68]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) 	mov		$Xfer,sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) 	&Xpreload(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) 	&Xpreload(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) 	&Xpreload(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) 	&Xpreload(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 	ldr	$t0,[$t1,#0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) 	add	$A,$A,$t2			@ h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 	ldr	$t2,[$t1,#4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) 	ldr	$t3,[$t1,#8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) 	ldr	$t4,[$t1,#12]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) 	add	$A,$A,$t0			@ accumulate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) 	ldr	$t0,[$t1,#16]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) 	add	$B,$B,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) 	ldr	$t2,[$t1,#20]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) 	add	$C,$C,$t3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) 	ldr	$t3,[$t1,#24]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) 	add	$D,$D,$t4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) 	ldr	$t4,[$t1,#28]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) 	add	$E,$E,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) 	str	$A,[$t1],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) 	add	$F,$F,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) 	str	$B,[$t1],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) 	add	$G,$G,$t3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) 	str	$C,[$t1],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) 	add	$H,$H,$t4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) 	str	$D,[$t1],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) 	stmia	$t1,{$E-$H}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) 	ittte	ne
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) 	movne	$Xfer,sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) 	ldrne	$t1,[sp,#0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) 	eorne	$t2,$t2,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) 	ldreq	sp,[sp,#76]			@ restore original sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) 	itt	ne
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) 	eorne	$t3,$B,$C
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) 	bne	.L_00_48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) 	ldmia	sp!,{r4-r12,pc}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) .size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) }}}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) ######################################################################
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) # ARMv8 stuff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) {{{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) my @MSG=map("q$_",(8..11));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) my $Ktbl="r3";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) # ifdef __thumb2__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) #  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) #  define INST(a,b,c,d)	.byte	a,b,c,d
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) .type	sha256_block_data_order_armv8,%function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) .align	5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) sha256_block_data_order_armv8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) .LARMv8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) 	vld1.32	{$ABCD,$EFGH},[$ctx]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) # ifdef __thumb2__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) 	adr	$Ktbl,.LARMv8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) 	sub	$Ktbl,$Ktbl,#.LARMv8-K256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) 	adrl	$Ktbl,K256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) 	add	$len,$inp,$len,lsl#6	@ len to point at the end of inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) .Loop_v8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) 	vld1.8		{@MSG[0]-@MSG[1]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) 	vld1.8		{@MSG[2]-@MSG[3]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) 	vld1.32		{$W0},[$Ktbl]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) 	vrev32.8	@MSG[0],@MSG[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) 	vrev32.8	@MSG[1],@MSG[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) 	vrev32.8	@MSG[2],@MSG[2]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) 	vrev32.8	@MSG[3],@MSG[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) 	vmov		$ABCD_SAVE,$ABCD	@ offload
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) 	vmov		$EFGH_SAVE,$EFGH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) 	teq		$inp,$len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) for($i=0;$i<12;$i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) 	vld1.32		{$W1},[$Ktbl]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) 	vadd.i32	$W0,$W0,@MSG[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) 	sha256su0	@MSG[0],@MSG[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) 	vmov		$abcd,$ABCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) 	sha256h		$ABCD,$EFGH,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) 	sha256h2	$EFGH,$abcd,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) 	sha256su1	@MSG[0],@MSG[2],@MSG[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) 	($W0,$W1)=($W1,$W0);	push(@MSG,shift(@MSG));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) 	vld1.32		{$W1},[$Ktbl]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) 	vadd.i32	$W0,$W0,@MSG[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) 	vmov		$abcd,$ABCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) 	sha256h		$ABCD,$EFGH,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) 	sha256h2	$EFGH,$abcd,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) 	vld1.32		{$W0},[$Ktbl]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) 	vadd.i32	$W1,$W1,@MSG[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) 	vmov		$abcd,$ABCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) 	sha256h		$ABCD,$EFGH,$W1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) 	sha256h2	$EFGH,$abcd,$W1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) 	vld1.32		{$W1},[$Ktbl]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) 	vadd.i32	$W0,$W0,@MSG[2]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) 	sub		$Ktbl,$Ktbl,#256-16	@ rewind
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) 	vmov		$abcd,$ABCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) 	sha256h		$ABCD,$EFGH,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) 	sha256h2	$EFGH,$abcd,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) 	vadd.i32	$W1,$W1,@MSG[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) 	vmov		$abcd,$ABCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) 	sha256h		$ABCD,$EFGH,$W1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) 	sha256h2	$EFGH,$abcd,$W1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) 	vadd.i32	$ABCD,$ABCD,$ABCD_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) 	vadd.i32	$EFGH,$EFGH,$EFGH_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) 	it		ne
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) 	bne		.Loop_v8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) 	vst1.32		{$ABCD,$EFGH},[$ctx]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) 	ret		@ bx lr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) .size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) }}}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) .asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) .align	2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) .comm   OPENSSL_armcap_P,4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) open SELF,$0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) while(<SELF>) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) 	next if (/^#!/);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) 	last if (!s/^#/@/ and !/^$/);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) 	print;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) close SELF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) {   my  %opcode = (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) 	"sha256h"	=> 0xf3000c40,	"sha256h2"	=> 0xf3100c40,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) 	"sha256su0"	=> 0xf3ba03c0,	"sha256su1"	=> 0xf3200c40	);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)     sub unsha256 {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) 	my ($mnemonic,$arg)=@_;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) 	if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) 	    my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) 					 |(($2&7)<<17)|(($2&8)<<4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) 					 |(($3&7)<<1) |(($3&8)<<2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) 	    # since ARMv7 instructions are always encoded little-endian.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) 	    # correct solution is to use .inst directive, but older
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) 	    # assemblers don't implement it:-(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) 	    sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) 			$word&0xff,($word>>8)&0xff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) 			($word>>16)&0xff,($word>>24)&0xff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) 			$mnemonic,$arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709)     }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) foreach (split($/,$code)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) 	s/\`([^\`]*)\`/eval $1/geo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) 	s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) 	s/\bret\b/bx	lr/go		or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) 	s/\bbx\s+lr\b/.word\t0xe12fff1e/go;	# make it possible to compile with -march=armv4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) 	print $_,"\n";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) close STDOUT; # enforce flush