^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) #!/usr/bin/env perl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) # SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) # This code is taken from the OpenSSL project but the author (Andy Polyakov)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) # has relicensed it under the GPLv2. Therefore this program is free software;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) # you can redistribute it and/or modify it under the terms of the GNU General
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) # Public License version 2 as published by the Free Software Foundation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) # The original headers, including the original license headers, are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) # included below for completeness.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) # ====================================================================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) # project. The module is, however, dual licensed under OpenSSL and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) # CRYPTOGAMS licenses depending on where you obtain it. For further
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) # details see https://www.openssl.org/~appro/cryptogams/.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) # ====================================================================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) # GHASH for for PowerISA v2.07.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) # July 2014
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) # Accurate performance measurements are problematic, because it's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) # always virtualized setup with possibly throttled processor.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) # Relative comparison is therefore more informative. This initial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) # version is ~2.1x slower than hardware-assisted AES-128-CTR, ~12x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) # faster than "4-bit" integer-only compiler-generated 64-bit code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) # "Initial version" means that there is room for futher improvement.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) $flavour=shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) $output =shift;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) if ($flavour =~ /64/) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) $SIZE_T=8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) $LRSAVE=2*$SIZE_T;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) $STU="stdu";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) $POP="ld";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) $PUSH="std";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) } elsif ($flavour =~ /32/) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) $SIZE_T=4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) $LRSAVE=$SIZE_T;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) $STU="stwu";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) $POP="lwz";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) $PUSH="stw";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) } else { die "nonsense $flavour"; }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) ( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) ( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) die "can't locate ppc-xlate.pl";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) open STDOUT,"| $^X $xlate $flavour $output" || die "can't call $xlate: $!";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) my ($Xip,$Htbl,$inp,$len)=map("r$_",(3..6)); # argument block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) my ($Xl,$Xm,$Xh,$IN)=map("v$_",(0..3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) my ($zero,$t0,$t1,$t2,$xC2,$H,$Hh,$Hl,$lemask)=map("v$_",(4..12));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) my $vrsave="r12";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) $code=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) .machine "any"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) .globl .gcm_init_p8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) lis r0,0xfff0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) li r8,0x10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) mfspr $vrsave,256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) li r9,0x20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) mtspr 256,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) li r10,0x30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) lvx_u $H,0,r4 # load H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) le?xor r7,r7,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) le?addi r7,r7,0x8 # need a vperm start with 08
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) le?lvsr 5,0,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) le?vspltisb 6,0x0f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) le?vxor 5,5,6 # set a b-endian mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) le?vperm $H,$H,$H,5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) vspltisb $xC2,-16 # 0xf0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) vspltisb $t0,1 # one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) vaddubm $xC2,$xC2,$xC2 # 0xe0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) vxor $zero,$zero,$zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) vor $xC2,$xC2,$t0 # 0xe1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) vsldoi $xC2,$xC2,$zero,15 # 0xe1...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) vsldoi $t1,$zero,$t0,1 # ...1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) vaddubm $xC2,$xC2,$xC2 # 0xc2...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) vspltisb $t2,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) vor $xC2,$xC2,$t1 # 0xc2....01
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) vspltb $t1,$H,0 # most significant byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) vsl $H,$H,$t0 # H<<=1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) vsrab $t1,$t1,$t2 # broadcast carry bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) vand $t1,$t1,$xC2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) vxor $H,$H,$t1 # twisted H
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) vsldoi $H,$H,$H,8 # twist even more ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) vsldoi $xC2,$zero,$xC2,8 # 0xc2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) vsldoi $Hl,$zero,$H,8 # ... and split
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) vsldoi $Hh,$H,$zero,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) stvx_u $xC2,0,r3 # save pre-computed table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) stvx_u $Hl,r8,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) stvx_u $H, r9,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) stvx_u $Hh,r10,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) mtspr 256,$vrsave
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) .long 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) .byte 0,12,0x14,0,0,0,2,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) .long 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) .size .gcm_init_p8,.-.gcm_init_p8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) .globl .gcm_gmult_p8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) lis r0,0xfff8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) li r8,0x10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) mfspr $vrsave,256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) li r9,0x20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) mtspr 256,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) li r10,0x30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) lvx_u $IN,0,$Xip # load Xi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) lvx_u $Hl,r8,$Htbl # load pre-computed table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) le?lvsl $lemask,r0,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) lvx_u $H, r9,$Htbl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) le?vspltisb $t0,0x07
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) lvx_u $Hh,r10,$Htbl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) le?vxor $lemask,$lemask,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) lvx_u $xC2,0,$Htbl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) le?vperm $IN,$IN,$IN,$lemask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) vxor $zero,$zero,$zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) vpmsumd $t2,$Xl,$xC2 # 1st phase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) vsldoi $t0,$Xm,$zero,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) vsldoi $t1,$zero,$Xm,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) vxor $Xl,$Xl,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) vxor $Xh,$Xh,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) vsldoi $Xl,$Xl,$Xl,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) vxor $Xl,$Xl,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) vsldoi $t1,$Xl,$Xl,8 # 2nd phase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) vpmsumd $Xl,$Xl,$xC2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) vxor $t1,$t1,$Xh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) vxor $Xl,$Xl,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) le?vperm $Xl,$Xl,$Xl,$lemask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) stvx_u $Xl,0,$Xip # write out Xi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) mtspr 256,$vrsave
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) .long 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) .byte 0,12,0x14,0,0,0,2,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) .long 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) .size .gcm_gmult_p8,.-.gcm_gmult_p8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) .globl .gcm_ghash_p8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) lis r0,0xfff8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) li r8,0x10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) mfspr $vrsave,256
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) li r9,0x20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) mtspr 256,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) li r10,0x30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) lvx_u $Xl,0,$Xip # load Xi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) lvx_u $Hl,r8,$Htbl # load pre-computed table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) le?lvsl $lemask,r0,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) lvx_u $H, r9,$Htbl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) le?vspltisb $t0,0x07
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) lvx_u $Hh,r10,$Htbl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) le?vxor $lemask,$lemask,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) lvx_u $xC2,0,$Htbl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) le?vperm $Xl,$Xl,$Xl,$lemask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) vxor $zero,$zero,$zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) lvx_u $IN,0,$inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) addi $inp,$inp,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) subi $len,$len,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) le?vperm $IN,$IN,$IN,$lemask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) vxor $IN,$IN,$Xl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) b Loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) Loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) subic $len,$len,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) subfe. r0,r0,r0 # borrow?-1:0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) and r0,r0,$len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) add $inp,$inp,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) vpmsumd $t2,$Xl,$xC2 # 1st phase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) vsldoi $t0,$Xm,$zero,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) vsldoi $t1,$zero,$Xm,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) vxor $Xl,$Xl,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) vxor $Xh,$Xh,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) vsldoi $Xl,$Xl,$Xl,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) vxor $Xl,$Xl,$t2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) lvx_u $IN,0,$inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) addi $inp,$inp,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) vsldoi $t1,$Xl,$Xl,8 # 2nd phase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) vpmsumd $Xl,$Xl,$xC2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) le?vperm $IN,$IN,$IN,$lemask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) vxor $t1,$t1,$Xh
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) vxor $IN,$IN,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) vxor $IN,$IN,$Xl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) beq Loop # did $len-=16 borrow?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) vxor $Xl,$Xl,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) le?vperm $Xl,$Xl,$Xl,$lemask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) stvx_u $Xl,0,$Xip # write out Xi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) mtspr 256,$vrsave
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) .long 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) .byte 0,12,0x14,0,0,0,4,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) .long 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) .size .gcm_ghash_p8,.-.gcm_ghash_p8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) .asciz "GHASH for PowerISA 2.07, CRYPTOGAMS by <appro\@openssl.org>"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) .align 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) foreach (split("\n",$code)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) if ($flavour =~ /le$/o) { # little-endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) s/le\?//o or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) s/be\?/#be#/o;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) } else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) s/le\?/#le#/o or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) s/be\?//o;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) print $_,"\n";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) close STDOUT; # enforce flush