^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) #!/usr/bin/env perl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) # SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) # ====================================================================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) # Written by Andy Polyakov, @dot-asm, originally for the OpenSSL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) # project.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) # ====================================================================
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) # Poly1305 hash for MIPS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) # May 2016
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) # Numbers are cycles per processed byte with poly1305_blocks alone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) # IALU/gcc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) # R1x000 ~5.5/+130% (big-endian)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) # Octeon II 2.50/+70% (little-endian)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) # March 2019
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) # Add 32-bit code path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) # October 2019
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) # Modulo-scheduling reduction allows to omit dependency chain at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) # end of inner loop and improve performance. Also optimize MIPS32R2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) # code path for MIPS 1004K core. Per René von Dorst's suggestions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) # IALU/gcc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) # R1x000 ~9.8/? (big-endian)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) # Octeon II 3.65/+140% (little-endian)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) # MT7621/1004K 4.75/? (little-endian)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) ######################################################################
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) # There is a number of MIPS ABI in use, O32 and N32/64 are most
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) # widely used. Then there is a new contender: NUBI. It appears that if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) # one picks the latter, it's possible to arrange code in ABI neutral
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) # manner. Therefore let's stick to NUBI register layout:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) ($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) ($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) # The return value is placed in $a0. Following coding rules facilitate
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) # interoperability:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) # - never ever touch $tp, "thread pointer", former $gp [o32 can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) # excluded from the rule, because it's specified volatile];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) # - copy return value to $t0, former $v0 [or to $a0 if you're adapting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) # old code];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) # - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) # For reference here is register layout for N32/64 MIPS ABIs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) # ($zero,$at,$v0,$v1)=map("\$$_",(0..3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) # ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) # ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) # ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) # ($gp,$sp,$fp,$ra)=map("\$$_",(28..31));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) # <appro@openssl.org>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) ######################################################################
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) $flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) $v0 = ($flavour =~ /nubi/i) ? $a0 : $t0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) if ($flavour =~ /64|n32/i) {{{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) ######################################################################
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) # 64-bit code path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) defined(_MIPS_ARCH_MIPS64R6)) \\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) && !defined(_MIPS_ARCH_MIPS64R2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) # define _MIPS_ARCH_MIPS64R2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) #if defined(_MIPS_ARCH_MIPS64R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) # define dmultu(rs,rt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) # define mflo(rd,rs,rt) dmulu rd,rs,rt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) # define mfhi(rd,rs,rt) dmuhu rd,rs,rt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) # define dmultu(rs,rt) dmultu rs,rt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) # define mflo(rd,rs,rt) mflo rd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) # define mfhi(rd,rs,rt) mfhi rd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #ifdef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) # define poly1305_init poly1305_init_mips
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) # define poly1305_blocks poly1305_blocks_mips
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) # define poly1305_emit poly1305_emit_mips
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) #if defined(__MIPSEB__) && !defined(MIPSEB)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) # define MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) #ifdef MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) # define MSB 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) # define LSB 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) # define MSB 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) # define LSB 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) .set noat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) .set noreorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) .globl poly1305_init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) .ent poly1305_init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) poly1305_init:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) .frame $sp,0,$ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) .set reorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) sd $zero,0($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) sd $zero,8($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) sd $zero,16($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) beqz $inp,.Lno_key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) #if defined(_MIPS_ARCH_MIPS64R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) andi $tmp0,$inp,7 # $inp % 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) dsubu $inp,$inp,$tmp0 # align $inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) sll $tmp0,$tmp0,3 # byte to bit offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) ld $in0,0($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) ld $in1,8($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) beqz $tmp0,.Laligned_key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) ld $tmp2,16($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) subu $tmp1,$zero,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) # ifdef MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) dsllv $in0,$in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) dsrlv $tmp3,$in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) dsllv $in1,$in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) dsrlv $tmp2,$tmp2,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) dsrlv $in0,$in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) dsllv $tmp3,$in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) dsrlv $in1,$in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) dsllv $tmp2,$tmp2,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) or $in0,$in0,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) or $in1,$in1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) .Laligned_key:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) ldl $in0,0+MSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) ldl $in1,8+MSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) ldr $in0,0+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) ldr $in1,8+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) #ifdef MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) # if defined(_MIPS_ARCH_MIPS64R2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) dsbh $in0,$in0 # byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) dsbh $in1,$in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) dshd $in0,$in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) dshd $in1,$in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) ori $tmp0,$zero,0xFF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) dsll $tmp2,$tmp0,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) or $tmp0,$tmp2 # 0x000000FF000000FF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) and $tmp1,$in0,$tmp0 # byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) and $tmp3,$in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) dsrl $tmp2,$in0,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) dsrl $tmp4,$in1,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) dsll $tmp1,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) dsll $tmp3,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) and $tmp2,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) and $tmp4,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) dsll $tmp0,8 # 0x0000FF000000FF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) or $tmp1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) or $tmp3,$tmp4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) and $tmp2,$in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) and $tmp4,$in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) dsrl $in0,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) dsrl $in1,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) dsll $tmp2,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) dsll $tmp4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) and $in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) and $in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) or $tmp1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) or $tmp3,$tmp4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) or $in0,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) or $in1,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) dsrl $tmp1,$in0,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) dsrl $tmp3,$in1,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) dsll $in0,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) dsll $in1,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) or $in0,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) or $in1,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) li $tmp0,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) dsll $tmp0,32 # 0x0000000100000000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) daddiu $tmp0,-63 # 0x00000000ffffffc1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) dsll $tmp0,28 # 0x0ffffffc10000000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) daddiu $tmp0,-1 # 0x0ffffffc0fffffff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) and $in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) daddiu $tmp0,-3 # 0x0ffffffc0ffffffc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) and $in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) sd $in0,24($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) dsrl $tmp0,$in1,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) sd $in1,32($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) sd $tmp0,40($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) .Lno_key:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) li $v0,0 # return 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) jr $ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) .end poly1305_init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) my ($shr,$shl) = ($s6,$s7); # used on R6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) .globl poly1305_blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) .ent poly1305_blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) poly1305_blocks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) .set noreorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) dsrl $len,4 # number of complete blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) bnez $len,poly1305_blocks_internal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) jr $ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) .end poly1305_blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) .ent poly1305_blocks_internal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) poly1305_blocks_internal:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) .set noreorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) #if defined(_MIPS_ARCH_MIPS64R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) .frame $sp,8*8,$ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) .mask $SAVED_REGS_MASK|0x000c0000,-8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) dsubu $sp,8*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) sd $s7,56($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) sd $s6,48($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) .frame $sp,6*8,$ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) .mask $SAVED_REGS_MASK,-8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) dsubu $sp,6*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) sd $s5,40($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) sd $s4,32($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) sd $s3,24($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) sd $s2,16($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) sd $s1,8($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) sd $s0,0($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) .set reorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) #if defined(_MIPS_ARCH_MIPS64R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) andi $shr,$inp,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) dsubu $inp,$inp,$shr # align $inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) sll $shr,$shr,3 # byte to bit offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) subu $shl,$zero,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) ld $h0,0($ctx) # load hash value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) ld $h1,8($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) ld $h2,16($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) ld $r0,24($ctx) # load key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) ld $r1,32($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) ld $rs1,40($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) dsll $len,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) daddu $len,$inp # end of buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) b .Loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) .Loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) #if defined(_MIPS_ARCH_MIPS64R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) ld $in0,0($inp) # load input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) ld $in1,8($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) beqz $shr,.Laligned_inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) ld $tmp2,16($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) # ifdef MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) dsllv $in0,$in0,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) dsrlv $tmp3,$in1,$shl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) dsllv $in1,$in1,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) dsrlv $tmp2,$tmp2,$shl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) dsrlv $in0,$in0,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) dsllv $tmp3,$in1,$shl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) dsrlv $in1,$in1,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) dsllv $tmp2,$tmp2,$shl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) or $in0,$in0,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) or $in1,$in1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) .Laligned_inp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) ldl $in0,0+MSB($inp) # load input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) ldl $in1,8+MSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) ldr $in0,0+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) ldr $in1,8+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) daddiu $inp,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) #ifdef MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) # if defined(_MIPS_ARCH_MIPS64R2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) dsbh $in0,$in0 # byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) dsbh $in1,$in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) dshd $in0,$in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) dshd $in1,$in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) ori $tmp0,$zero,0xFF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) dsll $tmp2,$tmp0,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) or $tmp0,$tmp2 # 0x000000FF000000FF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) and $tmp1,$in0,$tmp0 # byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) and $tmp3,$in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) dsrl $tmp2,$in0,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) dsrl $tmp4,$in1,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) dsll $tmp1,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) dsll $tmp3,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) and $tmp2,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) and $tmp4,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) dsll $tmp0,8 # 0x0000FF000000FF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) or $tmp1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) or $tmp3,$tmp4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) and $tmp2,$in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) and $tmp4,$in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) dsrl $in0,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) dsrl $in1,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) dsll $tmp2,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) dsll $tmp4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) and $in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) and $in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) or $tmp1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) or $tmp3,$tmp4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) or $in0,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) or $in1,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) dsrl $tmp1,$in0,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) dsrl $tmp3,$in1,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) dsll $in0,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) dsll $in1,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) or $in0,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) or $in1,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) dsrl $tmp1,$h2,2 # modulo-scheduled reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) andi $h2,$h2,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) dsll $tmp0,$tmp1,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) daddu $d0,$h0,$in0 # accumulate input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) daddu $tmp1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) sltu $tmp0,$d0,$h0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) daddu $d0,$d0,$tmp1 # ... and residue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) sltu $tmp1,$d0,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) daddu $d1,$h1,$in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) daddu $tmp0,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) sltu $tmp1,$d1,$h1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) daddu $d1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) dmultu ($r0,$d0) # h0*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) daddu $d2,$h2,$padbit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) sltu $tmp0,$d1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) mflo ($h0,$r0,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) mfhi ($h1,$r0,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) dmultu ($rs1,$d1) # h1*5*r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) daddu $d2,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) daddu $d2,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) mflo ($tmp0,$rs1,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) mfhi ($tmp1,$rs1,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) dmultu ($r1,$d0) # h0*r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) mflo ($tmp2,$r1,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) mfhi ($h2,$r1,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) daddu $h0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) daddu $h1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) sltu $tmp0,$h0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) dmultu ($r0,$d1) # h1*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) daddu $h1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) daddu $h1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) mflo ($tmp0,$r0,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) mfhi ($tmp1,$r0,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) dmultu ($rs1,$d2) # h2*5*r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) sltu $tmp2,$h1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) daddu $h2,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) mflo ($tmp2,$rs1,$d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) dmultu ($r0,$d2) # h2*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) daddu $h1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) daddu $h2,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) mflo ($tmp3,$r0,$d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) sltu $tmp0,$h1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) daddu $h2,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) daddu $h1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) sltu $tmp2,$h1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) daddu $h2,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) daddu $h2,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) bne $inp,$len,.Loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) sd $h0,0($ctx) # store hash value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) sd $h1,8($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) sd $h2,16($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) .set noreorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) #if defined(_MIPS_ARCH_MIPS64R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) ld $s7,56($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) ld $s6,48($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) ld $s5,40($sp) # epilogue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) ld $s4,32($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) ld $s3,24($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) ld $s2,16($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) ld $s1,8($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) ld $s0,0($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) jr $ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) #if defined(_MIPS_ARCH_MIPS64R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) daddu $sp,8*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) daddu $sp,6*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) .end poly1305_blocks_internal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) my ($ctx,$mac,$nonce) = ($a0,$a1,$a2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) .globl poly1305_emit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) .ent poly1305_emit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) poly1305_emit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) .frame $sp,0,$ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) .set reorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) ld $tmp2,16($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) ld $tmp0,0($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) ld $tmp1,8($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) li $in0,-4 # final reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) dsrl $in1,$tmp2,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) and $in0,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) andi $tmp2,$tmp2,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) daddu $in0,$in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) daddu $tmp0,$tmp0,$in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) sltu $in1,$tmp0,$in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) daddiu $in0,$tmp0,5 # compare to modulus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) daddu $tmp1,$tmp1,$in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) sltiu $tmp3,$in0,5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) sltu $tmp4,$tmp1,$in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) daddu $in1,$tmp1,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) daddu $tmp2,$tmp2,$tmp4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) sltu $tmp3,$in1,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) daddu $tmp2,$tmp2,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) dsrl $tmp2,2 # see if it carried/borrowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) dsubu $tmp2,$zero,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) xor $in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) xor $in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) and $in0,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) and $in1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) xor $in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) xor $in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) lwu $tmp0,0($nonce) # load nonce
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) lwu $tmp1,4($nonce)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) lwu $tmp2,8($nonce)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) lwu $tmp3,12($nonce)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) dsll $tmp1,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) dsll $tmp3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) or $tmp0,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) or $tmp2,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) daddu $in0,$tmp0 # accumulate nonce
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) daddu $in1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) sltu $tmp0,$in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) daddu $in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) dsrl $tmp0,$in0,8 # write mac value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) dsrl $tmp1,$in0,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) dsrl $tmp2,$in0,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) sb $in0,0($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) dsrl $tmp3,$in0,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) sb $tmp0,1($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) dsrl $tmp0,$in0,40
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) sb $tmp1,2($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) dsrl $tmp1,$in0,48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) sb $tmp2,3($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) dsrl $tmp2,$in0,56
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) sb $tmp3,4($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) dsrl $tmp3,$in1,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) sb $tmp0,5($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) dsrl $tmp0,$in1,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) sb $tmp1,6($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) dsrl $tmp1,$in1,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) sb $tmp2,7($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) sb $in1,8($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) dsrl $tmp2,$in1,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) sb $tmp3,9($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) dsrl $tmp3,$in1,40
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) sb $tmp0,10($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) dsrl $tmp0,$in1,48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) sb $tmp1,11($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) dsrl $tmp1,$in1,56
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) sb $tmp2,12($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) sb $tmp3,13($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) sb $tmp0,14($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) sb $tmp1,15($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) jr $ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) .end poly1305_emit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) .rdata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) .asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) .align 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) }}} else {{{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) ######################################################################
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) # 32-bit code path
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) #if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) defined(_MIPS_ARCH_MIPS32R6)) \\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) && !defined(_MIPS_ARCH_MIPS32R2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) # define _MIPS_ARCH_MIPS32R2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) #if defined(_MIPS_ARCH_MIPS32R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) # define multu(rs,rt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) # define mflo(rd,rs,rt) mulu rd,rs,rt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) # define mfhi(rd,rs,rt) muhu rd,rs,rt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) # define multu(rs,rt) multu rs,rt
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) # define mflo(rd,rs,rt) mflo rd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) # define mfhi(rd,rs,rt) mfhi rd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) #ifdef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) # define poly1305_init poly1305_init_mips
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) # define poly1305_blocks poly1305_blocks_mips
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) # define poly1305_emit poly1305_emit_mips
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) #if defined(__MIPSEB__) && !defined(MIPSEB)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) # define MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) #ifdef MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) # define MSB 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) # define LSB 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) # define MSB 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) # define LSB 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) .set noat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) .set noreorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) .globl poly1305_init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) .ent poly1305_init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) poly1305_init:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) .frame $sp,0,$ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) .set reorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) sw $zero,0($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) sw $zero,4($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) sw $zero,8($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) sw $zero,12($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) sw $zero,16($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) beqz $inp,.Lno_key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) #if defined(_MIPS_ARCH_MIPS32R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) andi $tmp0,$inp,3 # $inp % 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) subu $inp,$inp,$tmp0 # align $inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) sll $tmp0,$tmp0,3 # byte to bit offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) lw $in0,0($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) lw $in1,4($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) lw $in2,8($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) lw $in3,12($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) beqz $tmp0,.Laligned_key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) lw $tmp2,16($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) subu $tmp1,$zero,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) # ifdef MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) sllv $in0,$in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) srlv $tmp3,$in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) sllv $in1,$in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) or $in0,$in0,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) srlv $tmp3,$in2,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) sllv $in2,$in2,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) or $in1,$in1,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) srlv $tmp3,$in3,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) sllv $in3,$in3,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) or $in2,$in2,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) srlv $tmp2,$tmp2,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) or $in3,$in3,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) srlv $in0,$in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) sllv $tmp3,$in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) srlv $in1,$in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) or $in0,$in0,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) sllv $tmp3,$in2,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) srlv $in2,$in2,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) or $in1,$in1,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) sllv $tmp3,$in3,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) srlv $in3,$in3,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) or $in2,$in2,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) sllv $tmp2,$tmp2,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) or $in3,$in3,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) .Laligned_key:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) lwl $in0,0+MSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) lwl $in1,4+MSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) lwl $in2,8+MSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) lwl $in3,12+MSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) lwr $in0,0+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) lwr $in1,4+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) lwr $in2,8+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) lwr $in3,12+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) #ifdef MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) # if defined(_MIPS_ARCH_MIPS32R2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) wsbh $in0,$in0 # byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) wsbh $in1,$in1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) wsbh $in2,$in2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) wsbh $in3,$in3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) rotr $in0,$in0,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) rotr $in1,$in1,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) rotr $in2,$in2,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) rotr $in3,$in3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) srl $tmp0,$in0,24 # byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) srl $tmp1,$in0,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) andi $tmp2,$in0,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) sll $in0,$in0,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) andi $tmp1,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) sll $tmp2,$tmp2,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) or $in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) srl $tmp0,$in1,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) or $tmp1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) srl $tmp2,$in1,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) or $in0,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) andi $tmp1,$in1,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) sll $in1,$in1,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) andi $tmp2,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) sll $tmp1,$tmp1,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) or $in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) srl $tmp0,$in2,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) or $tmp2,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) srl $tmp1,$in2,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) or $in1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) andi $tmp2,$in2,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) sll $in2,$in2,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) andi $tmp1,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) sll $tmp2,$tmp2,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) or $in2,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) srl $tmp0,$in3,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) or $tmp1,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) srl $tmp2,$in3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) or $in2,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) andi $tmp1,$in3,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) sll $in3,$in3,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) andi $tmp2,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) sll $tmp1,$tmp1,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) or $in3,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) or $tmp2,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) or $in3,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) lui $tmp0,0x0fff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) ori $tmp0,0xffff # 0x0fffffff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) and $in0,$in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) subu $tmp0,3 # 0x0ffffffc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) and $in1,$in1,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) and $in2,$in2,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) and $in3,$in3,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) sw $in0,20($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) sw $in1,24($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) sw $in2,28($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) sw $in3,32($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) srl $tmp1,$in1,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) srl $tmp2,$in2,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) srl $tmp3,$in3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) addu $in2,$in2,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) addu $in3,$in3,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) sw $in1,36($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) sw $in2,40($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) sw $in3,44($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) .Lno_key:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) li $v0,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) jr $ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) .end poly1305_init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) my ($d0,$d1,$d2,$d3) =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) ($a4,$a5,$a6,$a7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) my $shr = $t2; # used on R6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) my $one = $t2; # used on R2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) .globl poly1305_blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) .ent poly1305_blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) poly1305_blocks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) .frame $sp,16*4,$ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) .mask $SAVED_REGS_MASK,-4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) .set noreorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) subu $sp, $sp,4*12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) sw $s11,4*11($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) sw $s10,4*10($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) sw $s9, 4*9($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) sw $s8, 4*8($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) sw $s7, 4*7($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) sw $s6, 4*6($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) sw $s5, 4*5($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) sw $s4, 4*4($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) sw $s3, 4*3($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) sw $s2, 4*2($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) sw $s1, 4*1($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) sw $s0, 4*0($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) .set reorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) srl $len,4 # number of complete blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) li $one,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) beqz $len,.Labort
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) #if defined(_MIPS_ARCH_MIPS32R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) andi $shr,$inp,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) subu $inp,$inp,$shr # align $inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) sll $shr,$shr,3 # byte to bit offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) lw $h0,0($ctx) # load hash value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) lw $h1,4($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) lw $h2,8($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) lw $h3,12($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) lw $h4,16($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) lw $r0,20($ctx) # load key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) lw $r1,24($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) lw $r2,28($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) lw $r3,32($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) lw $rs1,36($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) lw $rs2,40($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) lw $rs3,44($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) sll $len,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) addu $len,$len,$inp # end of buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) b .Loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) .Loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) #if defined(_MIPS_ARCH_MIPS32R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) lw $d0,0($inp) # load input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) lw $d1,4($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) lw $d2,8($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) lw $d3,12($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) beqz $shr,.Laligned_inp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) lw $t0,16($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) subu $t1,$zero,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) # ifdef MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) sllv $d0,$d0,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) srlv $at,$d1,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) sllv $d1,$d1,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) or $d0,$d0,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) srlv $at,$d2,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) sllv $d2,$d2,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) or $d1,$d1,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) srlv $at,$d3,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) sllv $d3,$d3,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) or $d2,$d2,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) srlv $t0,$t0,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) or $d3,$d3,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) srlv $d0,$d0,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) sllv $at,$d1,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) srlv $d1,$d1,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) or $d0,$d0,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) sllv $at,$d2,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) srlv $d2,$d2,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) or $d1,$d1,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) sllv $at,$d3,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828) srlv $d3,$d3,$shr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) or $d2,$d2,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) sllv $t0,$t0,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) or $d3,$d3,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) .Laligned_inp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) lwl $d0,0+MSB($inp) # load input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) lwl $d1,4+MSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) lwl $d2,8+MSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) lwl $d3,12+MSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839) lwr $d0,0+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) lwr $d1,4+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) lwr $d2,8+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) lwr $d3,12+LSB($inp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844) #ifdef MIPSEB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) # if defined(_MIPS_ARCH_MIPS32R2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) wsbh $d0,$d0 # byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) wsbh $d1,$d1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848) wsbh $d2,$d2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) wsbh $d3,$d3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850) rotr $d0,$d0,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) rotr $d1,$d1,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) rotr $d2,$d2,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) rotr $d3,$d3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) # else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) srl $at,$d0,24 # byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) srl $t0,$d0,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) andi $t1,$d0,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) sll $d0,$d0,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) andi $t0,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) sll $t1,$t1,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) or $d0,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) srl $at,$d1,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) or $t0,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) srl $t1,$d1,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) or $d0,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) andi $t0,$d1,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) sll $d1,$d1,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) andi $t1,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) sll $t0,$t0,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) or $d1,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) srl $at,$d2,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) or $t1,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) srl $t0,$d2,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) or $d1,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) andi $t1,$d2,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) sll $d2,$d2,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) andi $t0,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) sll $t1,$t1,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879) or $d2,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) srl $at,$d3,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) or $t0,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) srl $t1,$d3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883) or $d2,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) andi $t0,$d3,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) sll $d3,$d3,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) andi $t1,0xFF00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887) sll $t0,$t0,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) or $d3,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) or $t1,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) or $d3,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891) # endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) srl $t0,$h4,2 # modulo-scheduled reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) andi $h4,$h4,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) sll $at,$t0,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) addu $d0,$d0,$h0 # accumulate input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) addu $t0,$t0,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) sltu $h0,$d0,$h0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) addu $d0,$d0,$t0 # ... and residue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) sltu $at,$d0,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) addu $d1,$d1,$h1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) addu $h0,$h0,$at # carry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) sltu $h1,$d1,$h1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) addu $d1,$d1,$h0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) sltu $h0,$d1,$h0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) addu $d2,$d2,$h2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) addu $h1,$h1,$h0 # carry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) sltu $h2,$d2,$h2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) addu $d2,$d2,$h1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) sltu $h1,$d2,$h1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) addu $d3,$d3,$h3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916) addu $h2,$h2,$h1 # carry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) sltu $h3,$d3,$h3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) addu $d3,$d3,$h2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) #if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) multu $r0,$d0 # d0*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) sltu $h2,$d3,$h2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) maddu $rs3,$d1 # d1*s3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) addu $h3,$h3,$h2 # carry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925) maddu $rs2,$d2 # d2*s2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) addu $h4,$h4,$padbit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) maddu $rs1,$d3 # d3*s1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) addu $h4,$h4,$h3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) mfhi $at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) mflo $h0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) multu $r1,$d0 # d0*r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) maddu $r0,$d1 # d1*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) maddu $rs3,$d2 # d2*s3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) maddu $rs2,$d3 # d3*s2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) maddu $rs1,$h4 # h4*s1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) maddu $at,$one # hi*1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) mfhi $at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) mflo $h1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) multu $r2,$d0 # d0*r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) maddu $r1,$d1 # d1*r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) maddu $r0,$d2 # d2*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) maddu $rs3,$d3 # d3*s3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) maddu $rs2,$h4 # h4*s2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) maddu $at,$one # hi*1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) mfhi $at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) mflo $h2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) mul $t0,$r0,$h4 # h4*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) multu $r3,$d0 # d0*r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) maddu $r2,$d1 # d1*r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) maddu $r1,$d2 # d2*r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) maddu $r0,$d3 # d3*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) maddu $rs3,$h4 # h4*s3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) maddu $at,$one # hi*1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) mfhi $at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) mflo $h3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961) addiu $inp,$inp,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) addu $h4,$t0,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) multu ($r0,$d0) # d0*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) mflo ($h0,$r0,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) mfhi ($h1,$r0,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969) sltu $h2,$d3,$h2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) addu $h3,$h3,$h2 # carry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) multu ($rs3,$d1) # d1*s3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973) mflo ($at,$rs3,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) mfhi ($t0,$rs3,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) addu $h4,$h4,$padbit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) addiu $inp,$inp,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) addu $h4,$h4,$h3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) multu ($rs2,$d2) # d2*s2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) mflo ($a3,$rs2,$d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982) mfhi ($t1,$rs2,$d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) addu $h0,$h0,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) addu $h1,$h1,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) multu ($rs1,$d3) # d3*s1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) sltu $at,$h0,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987) addu $h1,$h1,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) mflo ($at,$rs1,$d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990) mfhi ($t0,$rs1,$d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) addu $h0,$h0,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) addu $h1,$h1,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) multu ($r1,$d0) # d0*r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) sltu $a3,$h0,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) addu $h1,$h1,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) mflo ($a3,$r1,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) mfhi ($h2,$r1,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) addu $h0,$h0,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) addu $h1,$h1,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) multu ($r0,$d1) # d1*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) sltu $at,$h0,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) addu $h1,$h1,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) mflo ($at,$r0,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) mfhi ($t0,$r0,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) addu $h1,$h1,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) sltu $a3,$h1,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) multu ($rs3,$d2) # d2*s3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) addu $h2,$h2,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) mflo ($a3,$rs3,$d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) mfhi ($t1,$rs3,$d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) addu $h1,$h1,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) addu $h2,$h2,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) multu ($rs2,$d3) # d3*s2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) sltu $at,$h1,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) addu $h2,$h2,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) mflo ($at,$rs2,$d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) mfhi ($t0,$rs2,$d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) addu $h1,$h1,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) addu $h2,$h2,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) multu ($rs1,$h4) # h4*s1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) sltu $a3,$h1,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) addu $h2,$h2,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) mflo ($a3,$rs1,$h4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) addu $h1,$h1,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) addu $h2,$h2,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) multu ($r2,$d0) # d0*r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) sltu $at,$h1,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) addu $h2,$h2,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) mflo ($at,$r2,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) mfhi ($h3,$r2,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) addu $h1,$h1,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) sltu $a3,$h1,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) multu ($r1,$d1) # d1*r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) addu $h2,$h2,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) mflo ($a3,$r1,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) mfhi ($t1,$r1,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) addu $h2,$h2,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) sltu $at,$h2,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) multu ($r0,$d2) # d2*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) addu $h3,$h3,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) mflo ($at,$r0,$d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) mfhi ($t0,$r0,$d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) addu $h2,$h2,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) addu $h3,$h3,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) multu ($rs3,$d3) # d3*s3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) sltu $a3,$h2,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) addu $h3,$h3,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) mflo ($a3,$rs3,$d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) mfhi ($t1,$rs3,$d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) addu $h2,$h2,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) addu $h3,$h3,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) multu ($rs2,$h4) # h4*s2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) sltu $at,$h2,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) addu $h3,$h3,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) mflo ($at,$rs2,$h4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) addu $h2,$h2,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) addu $h3,$h3,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) multu ($r3,$d0) # d0*r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) sltu $a3,$h2,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) addu $h3,$h3,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) mflo ($a3,$r3,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) mfhi ($t1,$r3,$d0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) addu $h2,$h2,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) sltu $at,$h2,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) multu ($r2,$d1) # d1*r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) addu $h3,$h3,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) mflo ($at,$r2,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) mfhi ($t0,$r2,$d1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) addu $h3,$h3,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) sltu $a3,$h3,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) multu ($r0,$d3) # d3*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) addu $t1,$t1,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) mflo ($a3,$r0,$d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) mfhi ($d3,$r0,$d3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) addu $h3,$h3,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) addu $t1,$t1,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) multu ($r1,$d2) # d2*r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) sltu $at,$h3,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) addu $t1,$t1,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) mflo ($at,$r1,$d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) mfhi ($t0,$r1,$d2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) addu $h3,$h3,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) addu $t1,$t1,$d3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) multu ($rs3,$h4) # h4*s3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) sltu $a3,$h3,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) addu $t1,$t1,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) mflo ($a3,$rs3,$h4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) addu $h3,$h3,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) addu $t1,$t1,$t0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) multu ($r0,$h4) # h4*r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) sltu $at,$h3,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) addu $t1,$t1,$at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) mflo ($h4,$r0,$h4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) addu $h3,$h3,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) sltu $a3,$h3,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) addu $t1,$t1,$a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) addu $h4,$h4,$t1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) li $padbit,1 # if we loop, padbit is 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) bne $inp,$len,.Loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) sw $h0,0($ctx) # store hash value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) sw $h1,4($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) sw $h2,8($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) sw $h3,12($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) sw $h4,16($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) .set noreorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) .Labort:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) lw $s11,4*11($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) lw $s10,4*10($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) lw $s9, 4*9($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) lw $s8, 4*8($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) lw $s7, 4*7($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) lw $s6, 4*6($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) lw $s5, 4*5($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) lw $s4, 4*4($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) $code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) lw $s3, 4*3($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) lw $s2, 4*2($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) lw $s1, 4*1($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) lw $s0, 4*0($sp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) jr $ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) addu $sp,$sp,4*12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) .end poly1305_blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) $code.=<<___;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) .globl poly1305_emit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) .ent poly1305_emit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) poly1305_emit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) .frame $sp,0,$ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) .set reorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) lw $tmp4,16($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) lw $tmp0,0($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) lw $tmp1,4($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) lw $tmp2,8($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) lw $tmp3,12($ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) li $in0,-4 # final reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) srl $ctx,$tmp4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) and $in0,$in0,$tmp4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) andi $tmp4,$tmp4,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) addu $ctx,$ctx,$in0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) addu $tmp0,$tmp0,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) sltu $ctx,$tmp0,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) addiu $in0,$tmp0,5 # compare to modulus
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) addu $tmp1,$tmp1,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) sltiu $in1,$in0,5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) sltu $ctx,$tmp1,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) addu $in1,$in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) addu $tmp2,$tmp2,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) sltu $in2,$in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) sltu $ctx,$tmp2,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) addu $in2,$in2,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) addu $tmp3,$tmp3,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) sltu $in3,$in2,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) sltu $ctx,$tmp3,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) addu $in3,$in3,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) addu $tmp4,$tmp4,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) sltu $ctx,$in3,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) addu $ctx,$tmp4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) srl $ctx,2 # see if it carried/borrowed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) subu $ctx,$zero,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) xor $in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) xor $in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) xor $in2,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) xor $in3,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) and $in0,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) and $in1,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) and $in2,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) and $in3,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) xor $in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) xor $in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) xor $in2,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) xor $in3,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) lw $tmp0,0($nonce) # load nonce
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) lw $tmp1,4($nonce)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) lw $tmp2,8($nonce)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) lw $tmp3,12($nonce)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) addu $in0,$tmp0 # accumulate nonce
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) sltu $ctx,$in0,$tmp0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) addu $in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) sltu $tmp1,$in1,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) addu $in1,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) sltu $ctx,$in1,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) addu $ctx,$tmp1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) addu $in2,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) sltu $tmp2,$in2,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) addu $in2,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) sltu $ctx,$in2,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) addu $ctx,$tmp2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) addu $in3,$tmp3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) addu $in3,$ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) srl $tmp0,$in0,8 # write mac value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) srl $tmp1,$in0,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) srl $tmp2,$in0,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) sb $in0, 0($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) sb $tmp0,1($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) srl $tmp0,$in1,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) sb $tmp1,2($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) srl $tmp1,$in1,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) sb $tmp2,3($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) srl $tmp2,$in1,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) sb $in1, 4($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) sb $tmp0,5($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) srl $tmp0,$in2,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) sb $tmp1,6($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) srl $tmp1,$in2,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) sb $tmp2,7($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) srl $tmp2,$in2,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) sb $in2, 8($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) sb $tmp0,9($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) srl $tmp0,$in3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) sb $tmp1,10($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) srl $tmp1,$in3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) sb $tmp2,11($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) srl $tmp2,$in3,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) sb $in3, 12($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) sb $tmp0,13($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) sb $tmp1,14($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) sb $tmp2,15($mac)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) jr $ra
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) .end poly1305_emit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) .rdata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) .asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) .align 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) ___
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) }}}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) $output=pop and open STDOUT,">$output";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) print $code;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) close STDOUT;