^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Fast SHA-256 implementation for SPE instruction set (PPC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * This code makes use of the SPE SIMD instruction set as defined in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * http://cache.freescale.com/files/32bit/doc/ref_manual/SPEPIM.pdf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Implementation is based on optimization guide notes from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * http://cache.freescale.com/files/32bit/doc/app_note/AN2665.pdf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <asm/ppc_asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <asm/asm-offsets.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #define rHP r3 /* pointer to hash values in memory */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #define rKP r24 /* pointer to round constants */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #define rWP r4 /* pointer to input data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #define rH0 r5 /* 8 32 bit hash values in 8 registers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #define rH1 r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define rH2 r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define rH3 r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define rH4 r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define rH5 r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define rH6 r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #define rH7 r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define rW0 r14 /* 64 bit registers. 16 words in 8 registers */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #define rW1 r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #define rW2 r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define rW3 r17
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #define rW4 r18
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #define rW5 r19
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #define rW6 r20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #define rW7 r21
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #define rT0 r22 /* 64 bit temporaries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #define rT1 r23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #define rT2 r0 /* 32 bit temporaries */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #define rT3 r25
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #define CMP_KN_LOOP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #define CMP_KC_LOOP \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) cmpwi rT1,0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #define INITIALIZE \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) stwu r1,-128(r1); /* create stack frame */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) evstdw r14,8(r1); /* We must save non volatile */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) evstdw r15,16(r1); /* registers. Take the chance */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) evstdw r16,24(r1); /* and save the SPE part too */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) evstdw r17,32(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) evstdw r18,40(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) evstdw r19,48(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) evstdw r20,56(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) evstdw r21,64(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) evstdw r22,72(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) evstdw r23,80(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) stw r24,88(r1); /* save normal registers */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) stw r25,92(r1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #define FINALIZE \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) evldw r14,8(r1); /* restore SPE registers */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) evldw r15,16(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) evldw r16,24(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) evldw r17,32(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) evldw r18,40(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) evldw r19,48(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) evldw r20,56(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) evldw r21,64(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) evldw r22,72(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) evldw r23,80(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) lwz r24,88(r1); /* restore normal registers */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) lwz r25,92(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) xor r0,r0,r0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) stw r0,8(r1); /* Delete sensitive data */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) stw r0,16(r1); /* that we might have pushed */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) stw r0,24(r1); /* from other context that runs */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) stw r0,32(r1); /* the same code. Assume that */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) stw r0,40(r1); /* the lower part of the GPRs */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) stw r0,48(r1); /* was already overwritten on */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) stw r0,56(r1); /* the way down to here */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) stw r0,64(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) stw r0,72(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) stw r0,80(r1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) addi r1,r1,128; /* cleanup stack frame */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #ifdef __BIG_ENDIAN__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #define LOAD_DATA(reg, off) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) lwz reg,off(rWP); /* load data */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #define NEXT_BLOCK \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) addi rWP,rWP,64; /* increment per block */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #define LOAD_DATA(reg, off) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) lwbrx reg,0,rWP; /* load data */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) addi rWP,rWP,4; /* increment per word */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) #define NEXT_BLOCK /* nothing to do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) #define R_LOAD_W(a, b, c, d, e, f, g, h, w, off) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) LOAD_DATA(w, off) /* 1: W */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) rotrwi rT0,e,6; /* 1: S1 = e rotr 6 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) rotrwi rT1,e,11; /* 1: S1' = e rotr 11 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) rotrwi rT2,e,25; /* 1: S1" = e rotr 25 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) xor rT0,rT0,rT1; /* 1: S1 = S1 xor S1' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) and rT3,e,f; /* 1: ch = e and f */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) xor rT0,rT0,rT2; /* 1: S1 = S1 xor S1" */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) andc rT1,g,e; /* 1: ch' = ~e and g */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) lwz rT2,off(rKP); /* 1: K */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) xor rT3,rT3,rT1; /* 1: ch = ch xor ch' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) add h,h,rT0; /* 1: temp1 = h + S1 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) add rT3,rT3,w; /* 1: temp1' = ch + w */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) rotrwi rT0,a,2; /* 1: S0 = a rotr 2 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) add h,h,rT3; /* 1: temp1 = temp1 + temp1' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) rotrwi rT1,a,13; /* 1: S0' = a rotr 13 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) add h,h,rT2; /* 1: temp1 = temp1 + K */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) rotrwi rT3,a,22; /* 1: S0" = a rotr 22 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) xor rT0,rT0,rT1; /* 1: S0 = S0 xor S0' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) add d,d,h; /* 1: d = d + temp1 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) xor rT3,rT0,rT3; /* 1: S0 = S0 xor S0" */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) evmergelo w,w,w; /* shift W */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) or rT2,a,b; /* 1: maj = a or b */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) and rT1,a,b; /* 1: maj' = a and b */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) and rT2,rT2,c; /* 1: maj = maj and c */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) LOAD_DATA(w, off+4) /* 2: W */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) or rT2,rT1,rT2; /* 1: maj = maj or maj' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) add rT3,rT3,rT2; /* 1: temp2 = S0 + maj */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) add h,h,rT3; /* 1: h = temp1 + temp2 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) rotrwi rT2,d,25; /* 2: S1" = e rotr 25 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) and rT3,d,e; /* 2: ch = e and f */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) andc rT1,f,d; /* 2: ch' = ~e and g */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) lwz rT2,off+4(rKP); /* 2: K */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) add g,g,rT0; /* 2: temp1 = h + S1 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) add rT3,rT3,w; /* 2: temp1' = ch + w */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) add g,g,rT3; /* 2: temp1 = temp1 + temp1' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) add g,g,rT2; /* 2: temp1 = temp1 + K */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) or rT2,h,a; /* 2: maj = a or b */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) and rT1,h,a; /* 2: maj' = a and b */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) and rT2,rT2,b; /* 2: maj = maj and c */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) add c,c,g; /* 2: d = d + temp1 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) add g,g,rT3 /* 2: h = temp1 + temp2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) #define R_CALC_W(a, b, c, d, e, f, g, h, w0, w1, w4, w5, w7, k, off) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) rotrwi rT2,e,6; /* 1: S1 = e rotr 6 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) evmergelohi rT0,w0,w1; /* w[-15] */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) rotrwi rT3,e,11; /* 1: S1' = e rotr 11 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) evsrwiu rT1,rT0,3; /* s0 = w[-15] >> 3 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) evrlwi rT0,rT0,25; /* s0' = w[-15] rotr 7 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) rotrwi rT3,e,25; /* 1: S1' = e rotr 25 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) evxor rT1,rT1,rT0; /* s0 = s0 xor s0' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) xor rT2,rT2,rT3; /* 1: S1 = S1 xor S1' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) evrlwi rT0,rT0,21; /* s0' = w[-15] rotr 18 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) add h,h,rT2; /* 1: temp1 = h + S1 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) evxor rT0,rT0,rT1; /* s0 = s0 xor s0' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) and rT2,e,f; /* 1: ch = e and f */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) evaddw w0,w0,rT0; /* w = w[-16] + s0 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) andc rT3,g,e; /* 1: ch' = ~e and g */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) evsrwiu rT0,w7,10; /* s1 = w[-2] >> 10 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) xor rT2,rT2,rT3; /* 1: ch = ch xor ch' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) evrlwi rT1,w7,15; /* s1' = w[-2] rotr 17 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) add h,h,rT2; /* 1: temp1 = temp1 + ch */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) rotrwi rT2,a,2; /* 1: S0 = a rotr 2 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) evrlwi rT1,w7,13; /* s1' = w[-2] rotr 19 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) rotrwi rT3,a,13; /* 1: S0' = a rotr 13 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) evxor rT0,rT0,rT1; /* s1 = s1 xor s1' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) evldw rT1,off(rKP); /* k */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) rotrwi rT3,a,22; /* 1: S0' = a rotr 22 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) evaddw w0,w0,rT0; /* w = w + s1 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) xor rT2,rT2,rT3; /* 1: S0 = S0 xor S0' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) evmergelohi rT0,w4,w5; /* w[-7] */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) and rT3,a,b; /* 1: maj = a and b */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) evaddw w0,w0,rT0; /* w = w + w[-7] */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) CMP_K##k##_LOOP \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) add rT2,rT2,rT3; /* 1: temp2 = S0 + maj */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) evaddw rT1,rT1,w0; /* wk = w + k */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) xor rT3,a,b; /* 1: maj = a xor b */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) evmergehi rT0,rT1,rT1; /* wk1/wk2 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) and rT3,rT3,c; /* 1: maj = maj and c */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) add h,h,rT0; /* 1: temp1 = temp1 + wk */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) add rT2,rT2,rT3; /* 1: temp2 = temp2 + maj */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) add g,g,rT1; /* 2: temp1 = temp1 + wk */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) add d,d,h; /* 1: d = d + temp1 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) rotrwi rT0,d,6; /* 2: S1 = e rotr 6 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) add h,h,rT2; /* 1: h = temp1 + temp2 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) rotrwi rT1,d,11; /* 2: S1' = e rotr 11 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) rotrwi rT2,d,25; /* 2: S" = e rotr 25 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) xor rT0,rT0,rT1; /* 2: S1 = S1 xor S1' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) and rT3,d,e; /* 2: ch = e and f */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) xor rT0,rT0,rT2; /* 2: S1 = S1 xor S1" */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) andc rT1,f,d; /* 2: ch' = ~e and g */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) add g,g,rT0; /* 2: temp1 = h + S1 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) xor rT3,rT3,rT1; /* 2: ch = ch xor ch' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) rotrwi rT0,h,2; /* 2: S0 = a rotr 2 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) add g,g,rT3; /* 2: temp1 = temp1 + ch */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) rotrwi rT1,h,13; /* 2: S0' = a rotr 13 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) rotrwi rT3,h,22; /* 2: S0" = a rotr 22 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) xor rT0,rT0,rT1; /* 2: S0 = S0 xor S0' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) or rT2,h,a; /* 2: maj = a or b */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) and rT1,h,a; /* 2: maj' = a and b */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) and rT2,rT2,b; /* 2: maj = maj and c */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) xor rT3,rT0,rT3; /* 2: S0 = S0 xor S0" */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) or rT2,rT1,rT2; /* 2: maj = maj or maj' */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) add c,c,g; /* 2: d = d + temp1 */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) add rT3,rT3,rT2; /* 2: temp2 = S0 + maj */ \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) add g,g,rT3 /* 2: h = temp1 + temp2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) _GLOBAL(ppc_spe_sha256_transform)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) INITIALIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) mtctr r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) lwz rH0,0(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) lwz rH1,4(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) lwz rH2,8(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) lwz rH3,12(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) lwz rH4,16(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) lwz rH5,20(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) lwz rH6,24(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) lwz rH7,28(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) ppc_spe_sha256_main:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) lis rKP,PPC_SPE_SHA256_K@ha
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) addi rKP,rKP,PPC_SPE_SHA256_K@l
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW0, 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW1, 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW2, 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW3, 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) R_LOAD_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7, rW4, 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) R_LOAD_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5, rW5, 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) R_LOAD_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3, rW6, 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) R_LOAD_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1, rW7, 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) ppc_spe_sha256_16_rounds:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) addi rKP,rKP,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) rW0, rW1, rW4, rW5, rW7, N, 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) rW1, rW2, rW5, rW6, rW0, N, 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) rW2, rW3, rW6, rW7, rW1, N, 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) rW3, rW4, rW7, rW0, rW2, N, 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) R_CALC_W(rH0, rH1, rH2, rH3, rH4, rH5, rH6, rH7,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) rW4, rW5, rW0, rW1, rW3, N, 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) R_CALC_W(rH6, rH7, rH0, rH1, rH2, rH3, rH4, rH5,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) rW5, rW6, rW1, rW2, rW4, N, 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) R_CALC_W(rH4, rH5, rH6, rH7, rH0, rH1, rH2, rH3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) rW6, rW7, rW2, rW3, rW5, N, 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) R_CALC_W(rH2, rH3, rH4, rH5, rH6, rH7, rH0, rH1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) rW7, rW0, rW3, rW4, rW6, C, 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) bt gt,ppc_spe_sha256_16_rounds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) lwz rW0,0(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) NEXT_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) lwz rW1,4(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) lwz rW2,8(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) lwz rW3,12(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) lwz rW4,16(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) lwz rW5,20(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) lwz rW6,24(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) lwz rW7,28(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) add rH0,rH0,rW0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) stw rH0,0(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) add rH1,rH1,rW1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) stw rH1,4(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) add rH2,rH2,rW2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) stw rH2,8(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) add rH3,rH3,rW3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) stw rH3,12(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) add rH4,rH4,rW4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) stw rH4,16(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) add rH5,rH5,rW5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) stw rH5,20(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) add rH6,rH6,rW6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) stw rH6,24(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) add rH7,rH7,rW7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) stw rH7,28(rHP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) bdnz ppc_spe_sha256_main
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) FINALIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) .data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) PPC_SPE_SHA256_K:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2