^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) #!/usr/bin/env perl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) # SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) # This code is taken from the OpenSSL project but the author (Andy Polyakov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) # has relicensed it under the GPLv2. Therefore this program is free software;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) # you can redistribute it and/or modify it under the terms of the GNU General
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) # Public License version 2 as published by the Free Software Foundation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) # The original headers, including the original license headers, are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) # included below for completeness.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) # ====================================================================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) # project. The module is, however, dual licensed under OpenSSL and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) # CRYPTOGAMS licenses depending on where you obtain it. For further
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) # details see https://www.openssl.org/~appro/cryptogams/.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) # ====================================================================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) # SHA256 block procedure for ARMv4. May 2007.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) # Performance is ~2x better than gcc 3.4 generated code and in "abso-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) # lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) # byte [on single-issue Xscale PXA250 core].
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) # July 2010.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) # Rescheduling for dual-issue pipeline resulted in 22% improvement on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) # Cortex A8 core and ~20 cycles per processed byte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) # February 2011.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) # Profiler-assisted and platform-specific optimization resulted in 16%
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) # improvement on Cortex A8 core and ~15.4 cycles per processed byte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) # September 2013.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) # Add NEON implementation. On Cortex A8 it was measured to process one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) # byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) # S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) # code (meaning that latter performs sub-optimally, nothing was done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) # about it).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) # May 2014.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) # Add ARMv8 code path performing at 2.0 cpb on Apple A7.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) open STDOUT,">$output";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) $ctx="r0"; $t0="r0";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) $inp="r1"; $t4="r1";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) $len="r2"; $t1="r2";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) $T1="r3"; $t3="r3";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) $A="r4";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) $B="r5";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) $C="r6";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) $D="r7";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) $E="r8";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) $F="r9";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) $G="r10";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) $H="r11";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) @V=($A,$B,$C,$D,$E,$F,$G,$H);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) $t2="r12";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) $Ktbl="r14";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) @Sigma0=( 2,13,22);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) @Sigma1=( 6,11,25);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) @sigma0=( 7,18, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) @sigma1=(17,19,10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) sub BODY_00_15 {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) $code.=<<___ if ($i<16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #if __ARM_ARCH__>=7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) @ ldr $t1,[$inp],#4 @ $i
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) # if $i==15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) str $inp,[sp,#17*4] @ make room for $t4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) # ifndef __ARMEB__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) rev $t1,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) @ ldrb $t1,[$inp,#3] @ $i
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) ldrb $t2,[$inp,#2]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) ldrb $t0,[$inp,#1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) orr $t1,$t1,$t2,lsl#8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) ldrb $t2,[$inp],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) orr $t1,$t1,$t0,lsl#16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) # if $i==15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) str $inp,[sp,#17*4] @ make room for $t4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]`
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) orr $t1,$t1,$t2,lsl#24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) ldr $t2,[$Ktbl],#4 @ *K256++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) add $h,$h,$t1 @ h+=X[i]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) str $t1,[sp,#`$i%16`*4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) eor $t1,$f,$g
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) add $h,$h,$t0,ror#$Sigma1[0] @ h+=Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) and $t1,$t1,$e
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) add $h,$h,$t2 @ h+=K256[i]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) eor $t1,$t1,$g @ Ch(e,f,g)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) eor $t0,$a,$a,ror#`$Sigma0[1]-$Sigma0[0]`
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) add $h,$h,$t1 @ h+=Ch(e,f,g)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) #if $i==31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) and $t2,$t2,#0xff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) cmp $t2,#0xf2 @ done?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) #if $i<15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) # if __ARM_ARCH__>=7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) ldr $t1,[$inp],#4 @ prefetch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) ldrb $t1,[$inp,#3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) eor $t2,$a,$b @ a^b, b^c in next round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) ldr $t1,[sp,#`($i+2)%16`*4] @ from future BODY_16_xx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) eor $t2,$a,$b @ a^b, b^c in next round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) ldr $t4,[sp,#`($i+15)%16`*4] @ from future BODY_16_xx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) eor $t0,$t0,$a,ror#`$Sigma0[2]-$Sigma0[0]` @ Sigma0(a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) and $t3,$t3,$t2 @ (b^c)&=(a^b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) add $d,$d,$h @ d+=h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) eor $t3,$t3,$b @ Maj(a,b,c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) add $h,$h,$t0,ror#$Sigma0[0] @ h+=Sigma0(a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) @ add $h,$h,$t3 @ h+=Maj(a,b,c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) ($t2,$t3)=($t3,$t2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) sub BODY_16_XX {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) my ($i,$a,$b,$c,$d,$e,$f,$g,$h) = @_;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) @ ldr $t1,[sp,#`($i+1)%16`*4] @ $i
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) @ ldr $t4,[sp,#`($i+14)%16`*4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) mov $t0,$t1,ror#$sigma0[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) add $a,$a,$t2 @ h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) mov $t2,$t4,ror#$sigma1[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) eor $t0,$t0,$t1,ror#$sigma0[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) eor $t2,$t2,$t4,ror#$sigma1[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) eor $t0,$t0,$t1,lsr#$sigma0[2] @ sigma0(X[i+1])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) ldr $t1,[sp,#`($i+0)%16`*4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) eor $t2,$t2,$t4,lsr#$sigma1[2] @ sigma1(X[i+14])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) ldr $t4,[sp,#`($i+9)%16`*4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) add $t2,$t2,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) eor $t0,$e,$e,ror#`$Sigma1[1]-$Sigma1[0]` @ from BODY_00_15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) add $t1,$t1,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) eor $t0,$t0,$e,ror#`$Sigma1[2]-$Sigma1[0]` @ Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) add $t1,$t1,$t4 @ X[i]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) &BODY_00_15(@_);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) $code=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) #ifndef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) # include "arm_arch.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) # define __ARM_ARCH__ __LINUX_ARM_ARCH__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) # define __ARM_MAX_ARCH__ 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) #if __ARM_ARCH__<7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) .code 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) .syntax unified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) # ifdef __thumb2__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) .thumb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) .code 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) .type K256,%object
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) K256:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) .word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) .word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) .word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) .word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) .word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) .word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) .word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) .word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) .word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) .word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) .word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) .word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) .word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) .word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) .size K256,.-K256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) .word 0 @ terminator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) .LOPENSSL_armcap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) .word OPENSSL_armcap_P-sha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) .global sha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) .type sha256_block_data_order,%function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) sha256_block_data_order:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) .Lsha256_block_data_order:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) #if __ARM_ARCH__<7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) sub r3,pc,#8 @ sha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) adr r3,.Lsha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) ldr r12,.LOPENSSL_armcap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) ldr r12,[r3,r12] @ OPENSSL_armcap_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) tst r12,#ARMV8_SHA256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) bne .LARMv8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) tst r12,#ARMV7_NEON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) bne .LNEON
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) add $len,$inp,$len,lsl#6 @ len to point at the end of inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) stmdb sp!,{$ctx,$inp,$len,r4-r11,lr}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) ldmia $ctx,{$A,$B,$C,$D,$E,$F,$G,$H}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) sub $Ktbl,r3,#256+32 @ K256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) sub sp,sp,#16*4 @ alloca(X[16])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) .Loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) # if __ARM_ARCH__>=7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) ldr $t1,[$inp],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) ldrb $t1,[$inp,#3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) eor $t3,$B,$C @ magic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) eor $t2,$t2,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) for($i=0;$i<16;$i++) { &BODY_00_15($i,@V); unshift(@V,pop(@V)); }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) $code.=".Lrounds_16_xx:\n";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) for (;$i<32;$i++) { &BODY_16_XX($i,@V); unshift(@V,pop(@V)); }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) #if __ARM_ARCH__>=7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) ite eq @ Thumb2 thing, sanity check in ARM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) ldreq $t3,[sp,#16*4] @ pull ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) bne .Lrounds_16_xx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) ldr $t0,[$t3,#0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) ldr $t1,[$t3,#4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) ldr $t2,[$t3,#8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) add $A,$A,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) ldr $t0,[$t3,#12]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) add $B,$B,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) ldr $t1,[$t3,#16]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) add $C,$C,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) ldr $t2,[$t3,#20]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) add $D,$D,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) ldr $t0,[$t3,#24]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) add $E,$E,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) ldr $t1,[$t3,#28]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) add $F,$F,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) ldr $inp,[sp,#17*4] @ pull inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) ldr $t2,[sp,#18*4] @ pull inp+len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) add $G,$G,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) add $H,$H,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) stmia $t3,{$A,$B,$C,$D,$E,$F,$G,$H}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) cmp $inp,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) sub $Ktbl,$Ktbl,#256 @ rewind Ktbl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) bne .Loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) add sp,sp,#`16+3`*4 @ destroy frame
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) #if __ARM_ARCH__>=5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) ldmia sp!,{r4-r11,pc}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) ldmia sp!,{r4-r11,lr}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) tst lr,#1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) moveq pc,lr @ be binary compatible with V4, yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) bx lr @ interoperable with Thumb ISA:-)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) .size sha256_block_data_order,.-sha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) ######################################################################
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) # NEON stuff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) {{{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) my @X=map("q$_",(0..3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) my ($T0,$T1,$T2,$T3,$T4,$T5)=("q8","q9","q10","q11","d24","d25");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) my $Xfer=$t4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) my $j=0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) sub AUTOLOAD() # thunk [simplified] x86-style perlasm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) { my $opcode = $AUTOLOAD; $opcode =~ s/.*:://; $opcode =~ s/_/\./;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) my $arg = pop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) $arg = "#$arg" if ($arg*1 eq $arg);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) $code .= "\t$opcode\t".join(',',@_,$arg)."\n";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) sub Xupdate()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) { use integer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) my $body = shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) my @insns = (&$body,&$body,&$body,&$body);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) my ($a,$b,$c,$d,$e,$f,$g,$h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) &vext_8 ($T0,@X[0],@X[1],4); # X[1..4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) &vext_8 ($T1,@X[2],@X[3],4); # X[9..12]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) &vshr_u32 ($T2,$T0,$sigma0[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += X[9..12]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) &vshr_u32 ($T1,$T0,$sigma0[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) &vsli_32 ($T2,$T0,32-$sigma0[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) &vshr_u32 ($T3,$T0,$sigma0[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) &veor ($T1,$T1,$T2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) &vsli_32 ($T3,$T0,32-$sigma0[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) &veor ($T1,$T1,$T3); # sigma0(X[1..4])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) &vshr_u32 ($T5,&Dhi(@X[3]),$sigma1[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) &vadd_i32 (@X[0],@X[0],$T1); # X[0..3] += sigma0(X[1..4])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) &veor ($T5,$T5,$T4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) &vshr_u32 ($T4,&Dhi(@X[3]),$sigma1[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) &vsli_32 ($T4,&Dhi(@X[3]),32-$sigma1[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) &veor ($T5,$T5,$T4); # sigma1(X[14..15])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) &vadd_i32 (&Dlo(@X[0]),&Dlo(@X[0]),$T5);# X[0..1] += sigma1(X[14..15])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) &vshr_u32 ($T5,&Dlo(@X[0]),$sigma1[2]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) &veor ($T5,$T5,$T4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) &vshr_u32 ($T4,&Dlo(@X[0]),$sigma1[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) &vld1_32 ("{$T0}","[$Ktbl,:128]!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) &vsli_32 ($T4,&Dlo(@X[0]),32-$sigma1[1]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) &veor ($T5,$T5,$T4); # sigma1(X[16..17])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) &vadd_i32 (&Dhi(@X[0]),&Dhi(@X[0]),$T5);# X[2..3] += sigma1(X[16..17])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) &vadd_i32 ($T0,$T0,@X[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) while($#insns>=2) { eval(shift(@insns)); }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) &vst1_32 ("{$T0}","[$Xfer,:128]!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) push(@X,shift(@X)); # "rotate" X[]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) sub Xpreload()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) { use integer;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) my $body = shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) my @insns = (&$body,&$body,&$body,&$body);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) my ($a,$b,$c,$d,$e,$f,$g,$h);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) &vld1_32 ("{$T0}","[$Ktbl,:128]!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) &vrev32_8 (@X[0],@X[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) eval(shift(@insns));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) &vadd_i32 ($T0,$T0,@X[0]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) foreach (@insns) { eval; } # remaining instructions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) &vst1_32 ("{$T0}","[$Xfer,:128]!");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) push(@X,shift(@X)); # "rotate" X[]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) sub body_00_15 () {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) '($a,$b,$c,$d,$e,$f,$g,$h)=@V;'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) '&add ($h,$h,$t1)', # h+=X[i]+K[i]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) '&eor ($t1,$f,$g)',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) '&eor ($t0,$e,$e,"ror#".($Sigma1[1]-$Sigma1[0]))',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) '&add ($a,$a,$t2)', # h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) '&and ($t1,$t1,$e)',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) '&eor ($t2,$t0,$e,"ror#".($Sigma1[2]-$Sigma1[0]))', # Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) '&eor ($t0,$a,$a,"ror#".($Sigma0[1]-$Sigma0[0]))',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) '&eor ($t1,$t1,$g)', # Ch(e,f,g)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) '&add ($h,$h,$t2,"ror#$Sigma1[0]")', # h+=Sigma1(e)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) '&eor ($t2,$a,$b)', # a^b, b^c in next round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) '&eor ($t0,$t0,$a,"ror#".($Sigma0[2]-$Sigma0[0]))', # Sigma0(a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) '&add ($h,$h,$t1)', # h+=Ch(e,f,g)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) '&ldr ($t1,sprintf "[sp,#%d]",4*(($j+1)&15)) if (($j&15)!=15);'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) '&ldr ($t1,"[$Ktbl]") if ($j==15);'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) '&ldr ($t1,"[sp,#64]") if ($j==31)',
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) '&and ($t3,$t3,$t2)', # (b^c)&=(a^b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) '&add ($d,$d,$h)', # d+=h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) '&add ($h,$h,$t0,"ror#$Sigma0[0]");'. # h+=Sigma0(a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) '&eor ($t3,$t3,$b)', # Maj(a,b,c)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) '$j++; unshift(@V,pop(@V)); ($t2,$t3)=($t3,$t2);'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) #if __ARM_MAX_ARCH__>=7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) .arch armv7-a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) .fpu neon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) .global sha256_block_data_order_neon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) .type sha256_block_data_order_neon,%function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) sha256_block_data_order_neon:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) .LNEON:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) stmdb sp!,{r4-r12,lr}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) sub $H,sp,#16*4+16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) adr $Ktbl,.Lsha256_block_data_order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) sub $Ktbl,$Ktbl,#.Lsha256_block_data_order-K256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) bic $H,$H,#15 @ align for 128-bit stores
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) mov $t2,sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) mov sp,$H @ alloca
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) add $len,$inp,$len,lsl#6 @ len to point at the end of inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) vld1.8 {@X[0]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) vld1.8 {@X[1]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) vld1.8 {@X[2]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) vld1.8 {@X[3]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) vld1.32 {$T0},[$Ktbl,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) vld1.32 {$T1},[$Ktbl,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) vld1.32 {$T2},[$Ktbl,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) vld1.32 {$T3},[$Ktbl,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) vrev32.8 @X[0],@X[0] @ yes, even on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) str $ctx,[sp,#64]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) vrev32.8 @X[1],@X[1] @ big-endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) str $inp,[sp,#68]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) mov $Xfer,sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) vrev32.8 @X[2],@X[2]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) str $len,[sp,#72]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) vrev32.8 @X[3],@X[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) str $t2,[sp,#76] @ save original sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) vadd.i32 $T0,$T0,@X[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) vadd.i32 $T1,$T1,@X[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) vst1.32 {$T0},[$Xfer,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) vadd.i32 $T2,$T2,@X[2]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) vst1.32 {$T1},[$Xfer,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) vadd.i32 $T3,$T3,@X[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) vst1.32 {$T2},[$Xfer,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) vst1.32 {$T3},[$Xfer,:128]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) ldmia $ctx,{$A-$H}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) sub $Xfer,$Xfer,#64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) ldr $t1,[sp,#0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) eor $t2,$t2,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) eor $t3,$B,$C
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) b .L_00_48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) .L_00_48:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) &Xupdate(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) &Xupdate(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) &Xupdate(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) &Xupdate(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) teq $t1,#0 @ check for K256 terminator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) ldr $t1,[sp,#0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) sub $Xfer,$Xfer,#64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) bne .L_00_48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) ldr $inp,[sp,#68]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) ldr $t0,[sp,#72]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) sub $Ktbl,$Ktbl,#256 @ rewind $Ktbl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) teq $inp,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) it eq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) subeq $inp,$inp,#64 @ avoid SEGV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) vld1.8 {@X[0]},[$inp]! @ load next input block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) vld1.8 {@X[1]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) vld1.8 {@X[2]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) vld1.8 {@X[3]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) it ne
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) strne $inp,[sp,#68]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) mov $Xfer,sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) &Xpreload(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) &Xpreload(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) &Xpreload(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) &Xpreload(\&body_00_15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) ldr $t0,[$t1,#0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) add $A,$A,$t2 @ h+=Maj(a,b,c) from the past
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) ldr $t2,[$t1,#4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) ldr $t3,[$t1,#8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) ldr $t4,[$t1,#12]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) add $A,$A,$t0 @ accumulate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) ldr $t0,[$t1,#16]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) add $B,$B,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) ldr $t2,[$t1,#20]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) add $C,$C,$t3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) ldr $t3,[$t1,#24]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) add $D,$D,$t4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) ldr $t4,[$t1,#28]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) add $E,$E,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) str $A,[$t1],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) add $F,$F,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) str $B,[$t1],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) add $G,$G,$t3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) str $C,[$t1],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) add $H,$H,$t4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) str $D,[$t1],#4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) stmia $t1,{$E-$H}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) ittte ne
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) movne $Xfer,sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) ldrne $t1,[sp,#0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) eorne $t2,$t2,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) ldreq sp,[sp,#76] @ restore original sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) itt ne
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) eorne $t3,$B,$C
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) bne .L_00_48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) ldmia sp!,{r4-r12,pc}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) .size sha256_block_data_order_neon,.-sha256_block_data_order_neon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) }}}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) ######################################################################
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) # ARMv8 stuff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) {{{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) my @MSG=map("q$_",(8..11));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) my $Ktbl="r3";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) # ifdef __thumb2__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) # define INST(a,b,c,d) .byte c,d|0xc,a,b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) # define INST(a,b,c,d) .byte a,b,c,d
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) .type sha256_block_data_order_armv8,%function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) sha256_block_data_order_armv8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) .LARMv8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) vld1.32 {$ABCD,$EFGH},[$ctx]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) # ifdef __thumb2__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) adr $Ktbl,.LARMv8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) sub $Ktbl,$Ktbl,#.LARMv8-K256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) adrl $Ktbl,K256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) add $len,$inp,$len,lsl#6 @ len to point at the end of inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) .Loop_v8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) vld1.8 {@MSG[0]-@MSG[1]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) vld1.8 {@MSG[2]-@MSG[3]},[$inp]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) vld1.32 {$W0},[$Ktbl]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) vrev32.8 @MSG[0],@MSG[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) vrev32.8 @MSG[1],@MSG[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) vrev32.8 @MSG[2],@MSG[2]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) vrev32.8 @MSG[3],@MSG[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) vmov $ABCD_SAVE,$ABCD @ offload
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) vmov $EFGH_SAVE,$EFGH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) teq $inp,$len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) for($i=0;$i<12;$i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) vld1.32 {$W1},[$Ktbl]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) vadd.i32 $W0,$W0,@MSG[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) sha256su0 @MSG[0],@MSG[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) vmov $abcd,$ABCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) sha256h $ABCD,$EFGH,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) sha256h2 $EFGH,$abcd,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) sha256su1 @MSG[0],@MSG[2],@MSG[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) vld1.32 {$W1},[$Ktbl]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) vadd.i32 $W0,$W0,@MSG[0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) vmov $abcd,$ABCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) sha256h $ABCD,$EFGH,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) sha256h2 $EFGH,$abcd,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) vld1.32 {$W0},[$Ktbl]!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) vadd.i32 $W1,$W1,@MSG[1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) vmov $abcd,$ABCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) sha256h $ABCD,$EFGH,$W1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) sha256h2 $EFGH,$abcd,$W1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) vld1.32 {$W1},[$Ktbl]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) vadd.i32 $W0,$W0,@MSG[2]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) sub $Ktbl,$Ktbl,#256-16 @ rewind
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) vmov $abcd,$ABCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) sha256h $ABCD,$EFGH,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) sha256h2 $EFGH,$abcd,$W0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) vadd.i32 $W1,$W1,@MSG[3]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) vmov $abcd,$ABCD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) sha256h $ABCD,$EFGH,$W1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) sha256h2 $EFGH,$abcd,$W1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) vadd.i32 $ABCD,$ABCD,$ABCD_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) vadd.i32 $EFGH,$EFGH,$EFGH_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) it ne
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) bne .Loop_v8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) vst1.32 {$ABCD,$EFGH},[$ctx]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) ret @ bx lr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) .size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) }}}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) .asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) .align 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) .comm OPENSSL_armcap_P,4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) open SELF,$0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) while(<SELF>) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) next if (/^#!/);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) last if (!s/^#/@/ and !/^$/);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) print;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) close SELF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) { my %opcode = (
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) "sha256h" => 0xf3000c40, "sha256h2" => 0xf3100c40,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) "sha256su0" => 0xf3ba03c0, "sha256su1" => 0xf3200c40 );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) sub unsha256 {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) my ($mnemonic,$arg)=@_;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) if ($arg =~ m/q([0-9]+)(?:,\s*q([0-9]+))?,\s*q([0-9]+)/o) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) my $word = $opcode{$mnemonic}|(($1&7)<<13)|(($1&8)<<19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) |(($2&7)<<17)|(($2&8)<<4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) |(($3&7)<<1) |(($3&8)<<2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) # since ARMv7 instructions are always encoded little-endian.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) # correct solution is to use .inst directive, but older
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) # assemblers don't implement it:-(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) $word&0xff,($word>>8)&0xff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) ($word>>16)&0xff,($word>>24)&0xff,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) $mnemonic,$arg;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) foreach (split($/,$code)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) s/\`([^\`]*)\`/eval $1/geo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) s/\b(sha256\w+)\s+(q.*)/unsha256($1,$2)/geo;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) s/\bret\b/bx lr/go or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) s/\bbx\s+lr\b/.word\t0xe12fff1e/go; # make it possible to compile with -march=armv4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) print $_,"\n";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) close STDOUT; # enforce flush