^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * x86_64/AVX2 assembler optimized version of Serpent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Based on AVX assembler implementation of Serpent by:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Copyright © 2012 Johannes Goetzfried
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <asm/frame.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include "glue_helper-asm-avx2.S"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) .file "serpent-avx2-asm_64.S"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) .section .rodata.cst16.bswap128_mask, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) .Lbswap128_mask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) .section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) .Lxts_gf128mul_and_shl1_mask_0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) .section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) .Lxts_gf128mul_and_shl1_mask_1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #define CTX %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define RNOT %ymm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #define tp %ymm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #define RA1 %ymm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #define RA2 %ymm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #define RB1 %ymm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #define RB2 %ymm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #define RC1 %ymm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #define RC2 %ymm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #define RD1 %ymm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #define RD2 %ymm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #define RE1 %ymm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #define RE2 %ymm11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #define RK0 %ymm12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #define RK1 %ymm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #define RK2 %ymm14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #define RK3 %ymm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #define RK0x %xmm12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #define RK1x %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #define RK2x %xmm14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #define RK3x %xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #define S0_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) vpor x0, x3, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) vpxor x3, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) vpxor x2, x3, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) vpxor RNOT, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) vpxor x1, tp, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) vpand x0, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) vpxor x4, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) vpxor x0, x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #define S0_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) vpxor x3, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) vpor x0, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) vpxor x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) vpand x1, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) vpxor x2, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) vpxor RNOT, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) vpxor x4, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) vpxor x2, x1, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #define S1_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) vpxor x0, x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) vpxor x3, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) vpxor RNOT, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) vpand tp, x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) vpor tp, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) vpxor x2, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) vpxor x3, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) vpxor x3, tp, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #define S1_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) vpxor x4, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) vpor x4, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) vpxor x2, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) vpand x0, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) vpxor x1, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) vpor x0, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) vpxor RNOT, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) vpxor x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) vpxor x1, x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) #define S2_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) vpxor RNOT, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) vpxor x0, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) vpand x2, x0, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) vpxor x3, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) vpor x0, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) vpxor x1, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) vpxor x1, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) vpand tp, x1, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) #define S2_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) vpxor x2, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) vpand x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) vpor x1, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) vpxor RNOT, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) vpxor tp, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) vpxor tp, x0, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) vpxor x2, tp, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) vpor x2, x1, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) #define S3_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) vpxor x3, x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) vpor x0, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) vpand x0, x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) vpxor x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) vpxor tp, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) vpand x3, tp, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) vpor x4, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) vpxor x3, x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) #define S3_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) vpxor x0, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) vpand x3, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) vpand x4, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) vpxor x2, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) vpor x1, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) vpand x1, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) vpxor x3, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) vpxor x3, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) vpxor x2, x3, x3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) #define S4_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) vpand x0, x3, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) vpxor x3, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) vpxor x2, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) vpor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) vpxor x1, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) vpxor tp, x3, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) vpor x0, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) vpxor x1, x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) #define S4_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) vpand x0, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) vpxor x4, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) vpand x2, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) vpxor tp, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) vpxor x0, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) vpor x1, tp, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) vpxor RNOT, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) vpxor x0, x3, x3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) #define S5_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) vpor x0, x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) vpxor tp, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) vpxor RNOT, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) vpxor x0, x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) vpxor x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) vpand x4, tp, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) vpor x3, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) vpxor x0, x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) #define S5_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) vpand x3, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) vpxor x3, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) vpxor x2, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) vpxor x1, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) vpand x4, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) vpxor x2, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) vpand x0, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) vpxor x2, x3, x3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) #define S6_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) vpxor x0, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) vpxor x2, x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) vpxor x0, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) vpand x3, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) vpor x3, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) vpxor RNOT, x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) vpxor tp, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) vpxor x2, tp, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) #define S6_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) vpxor x4, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) vpxor x0, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) vpand x0, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) vpxor x1, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) vpand x1, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) vpxor x0, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) vpxor x2, x1, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) #define S7_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) vpxor RNOT, x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) vpxor RNOT, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) vpand x2, tp, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) vpxor x3, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) vpor tp, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) vpxor x2, tp, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) vpxor x0, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) vpor x1, x0, x0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) #define S7_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) vpand x0, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) vpxor x4, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) vpxor x3, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) vpand x0, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) vpxor x1, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) vpxor x4, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) vpxor x1, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) vpor x0, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) vpxor x1, x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) #define SI0_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) vpxor x0, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) vpor x1, x3, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) vpxor x1, x3, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) vpxor RNOT, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) vpxor tp, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) vpxor x0, tp, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) vpand x1, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) vpxor x2, x0, x0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) #define SI0_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) vpand x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) vpxor x4, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) vpxor x3, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) vpand x0, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) vpxor x0, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) vpxor x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) vpxor x3, x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) #define SI1_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) vpxor x3, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) vpxor x2, x0, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) vpxor RNOT, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) vpor x1, x0, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) vpxor x3, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) vpand x1, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) vpxor x2, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) vpand x4, x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) #define SI1_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) vpxor x1, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) vpor x3, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) vpxor tp, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) vpxor tp, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) vpor x4, tp, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) vpxor x4, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) vpxor x0, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) vpxor x1, x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) #define SI2_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) vpxor x1, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) vpxor RNOT, x3, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) vpor x2, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) vpxor x0, x3, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) vpxor x1, tp, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) vpor x2, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) vpxor x0, x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) #define SI2_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) vpxor x4, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) vpor x3, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) vpxor x2, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) vpand x1, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) vpxor x4, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) vpxor x0, x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) #define SI3_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) vpxor x1, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) vpand x2, x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) vpxor x0, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) vpor x1, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) vpxor x3, x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) vpxor x3, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) vpor tp, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) vpxor x2, tp, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) #define SI3_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) vpxor x3, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) vpxor x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) vpand x1, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) vpxor x0, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) vpand x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) vpxor x3, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) vpxor x0, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) vpxor x1, x0, x0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) #define SI4_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) vpand x1, x0, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) vpxor x2, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) vpor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) vpxor RNOT, x0, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) vpxor tp, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) vpxor x2, tp, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) vpand x4, x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) #define SI4_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) vpxor x0, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) vpor x4, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) vpxor x3, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) vpand x2, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) vpxor x3, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) vpxor x1, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) vpand x0, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) vpxor x1, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) vpxor x3, x0, x0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) #define SI5_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) vpor x2, x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) vpxor x1, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) vpxor x3, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) vpand x1, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) vpor x0, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) vpxor RNOT, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) vpxor x2, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) vpor x0, x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) #define SI5_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) vpxor tp, x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) vpxor x4, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) vpand x0, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) vpxor tp, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) vpxor x3, tp, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) vpand x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) vpxor x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) vpxor x4, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) vpxor x3, x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) #define SI6_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) vpxor x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) vpand x3, x0, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) vpxor x2, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) vpxor x1, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) vpor x0, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) vpxor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) vpand tp, x3, x3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) #define SI6_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) vpxor RNOT, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) vpxor x1, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) vpand x2, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) vpxor tp, x0, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) vpxor x4, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) vpxor x2, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) vpxor x1, tp, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) vpxor x0, x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) #define SI7_1(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) vpand x0, x3, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) vpxor x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) vpor x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) vpxor x1, x3, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) vpxor RNOT, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) vpor tp, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) vpxor x0, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) vpand x2, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) vpxor x1, x0, x0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) #define SI7_2(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) vpand x2, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) vpxor x2, tp, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) vpxor x3, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) vpand x3, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) vpor x0, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) vpxor x4, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) vpxor x4, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) vpand x0, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) vpxor x2, x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) #define get_key(i,j,t) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) vpbroadcastd (4*(i)+(j))*4(CTX), t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) #define K2(x0, x1, x2, x3, x4, i) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) get_key(i, 0, RK0); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) get_key(i, 1, RK1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) get_key(i, 2, RK2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) get_key(i, 3, RK3); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) vpxor RK0, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) vpxor RK1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) vpxor RK2, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) vpxor RK3, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) vpxor RK0, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) vpxor RK1, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) vpxor RK2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) vpxor RK3, x3 ## 2, x3 ## 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) #define LK2(x0, x1, x2, x3, x4, i) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) vpslld $13, x0 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) vpsrld $(32 - 13), x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) vpor x4 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) vpxor x0 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) vpslld $3, x2 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) vpsrld $(32 - 3), x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) vpor x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) vpxor x2 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) vpslld $13, x0 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) vpsrld $(32 - 13), x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) vpor x4 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) vpxor x0 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) vpslld $3, x2 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) vpsrld $(32 - 3), x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) vpor x4 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) vpxor x2 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) vpslld $1, x1 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) vpsrld $(32 - 1), x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) vpor x4 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) vpslld $3, x0 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) vpxor x2 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) vpxor x4 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) get_key(i, 1, RK1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) vpslld $1, x1 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) vpsrld $(32 - 1), x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) vpor x4 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) vpslld $3, x0 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) vpxor x2 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) vpxor x4 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) get_key(i, 3, RK3); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) vpslld $7, x3 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) vpsrld $(32 - 7), x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) vpor x4 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) vpslld $7, x1 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) vpxor x1 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) vpxor x3 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) vpxor x3 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) vpxor x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) get_key(i, 0, RK0); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) vpslld $7, x3 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) vpsrld $(32 - 7), x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) vpor x4 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) vpslld $7, x1 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) vpxor x1 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) vpxor x3 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) vpxor x3 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) vpxor x4 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) get_key(i, 2, RK2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) vpxor RK1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) vpxor RK3, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) vpslld $5, x0 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) vpsrld $(32 - 5), x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) vpor x4 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) vpslld $22, x2 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) vpsrld $(32 - 22), x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) vpor x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) vpxor RK0, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) vpxor RK2, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) vpxor RK1, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) vpxor RK3, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) vpslld $5, x0 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) vpsrld $(32 - 5), x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) vpor x4 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) vpslld $22, x2 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) vpsrld $(32 - 22), x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) vpor x4 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) vpxor RK0, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) vpxor RK2, x2 ## 2, x2 ## 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) #define KL2(x0, x1, x2, x3, x4, i) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) vpxor RK0, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) vpxor RK2, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) vpsrld $5, x0 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) vpslld $(32 - 5), x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) vpor x4 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) vpxor RK3, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) vpxor RK1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) vpsrld $22, x2 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) vpslld $(32 - 22), x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) vpor x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) vpxor x3 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) vpxor RK0, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) vpxor RK2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) vpsrld $5, x0 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) vpslld $(32 - 5), x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) vpor x4 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) vpxor RK3, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) vpxor RK1, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) vpsrld $22, x2 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) vpslld $(32 - 22), x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) vpor x4 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) vpxor x3 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) vpxor x3 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) vpslld $7, x1 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) vpxor x1 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) vpxor x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) vpsrld $1, x1 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) vpslld $(32 - 1), x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) vpor x4 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) vpxor x3 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) vpslld $7, x1 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) vpxor x1 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) vpxor x4 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) vpsrld $1, x1 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) vpslld $(32 - 1), x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) vpor x4 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) vpsrld $7, x3 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) vpslld $(32 - 7), x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) vpor x4 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) vpxor x0 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) vpslld $3, x0 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) vpxor x4 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) vpsrld $7, x3 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) vpslld $(32 - 7), x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) vpor x4 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) vpxor x0 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) vpslld $3, x0 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) vpxor x4 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) vpsrld $13, x0 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) vpslld $(32 - 13), x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) vpor x4 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) vpxor x2 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) vpxor x2 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) vpsrld $3, x2 ## 1, x4 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) vpslld $(32 - 3), x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) vpor x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) vpsrld $13, x0 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) vpslld $(32 - 13), x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) vpor x4 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) vpxor x2 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) vpxor x2 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) vpsrld $3, x2 ## 2, x4 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) vpslld $(32 - 3), x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) vpor x4 ## 2, x2 ## 2, x2 ## 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) #define S(SBOX, x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) #define SP(SBOX, x0, x1, x2, x3, x4, i) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) get_key(i, 0, RK0); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) get_key(i, 2, RK2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) get_key(i, 3, RK3); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) get_key(i, 1, RK1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) vpunpckldq x1, x0, t0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) vpunpckhdq x1, x0, t2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) vpunpckldq x3, x2, t1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) vpunpckhdq x3, x2, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) vpunpcklqdq t1, t0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) vpunpckhqdq t1, t0, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) vpunpcklqdq x3, t2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) vpunpckhqdq x3, t2, x3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) #define read_blocks(x0, x1, x2, x3, t0, t1, t2) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) .align 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) /* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) * %rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) vpcmpeqd RNOT, RNOT, RNOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) K2(RA, RB, RC, RD, RE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) S(S0, RA, RB, RC, RD, RE); LK2(RC, RB, RD, RA, RE, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) S(S1, RC, RB, RD, RA, RE); LK2(RE, RD, RA, RC, RB, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) S(S2, RE, RD, RA, RC, RB); LK2(RB, RD, RE, RC, RA, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) S(S3, RB, RD, RE, RC, RA); LK2(RC, RA, RD, RB, RE, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) S(S4, RC, RA, RD, RB, RE); LK2(RA, RD, RB, RE, RC, 5);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) S(S5, RA, RD, RB, RE, RC); LK2(RC, RA, RD, RE, RB, 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) S(S6, RC, RA, RD, RE, RB); LK2(RD, RB, RA, RE, RC, 7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) S(S7, RD, RB, RA, RE, RC); LK2(RC, RA, RE, RD, RB, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) S(S0, RC, RA, RE, RD, RB); LK2(RE, RA, RD, RC, RB, 9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) S(S1, RE, RA, RD, RC, RB); LK2(RB, RD, RC, RE, RA, 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) S(S2, RB, RD, RC, RE, RA); LK2(RA, RD, RB, RE, RC, 11);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) S(S3, RA, RD, RB, RE, RC); LK2(RE, RC, RD, RA, RB, 12);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) S(S4, RE, RC, RD, RA, RB); LK2(RC, RD, RA, RB, RE, 13);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) S(S5, RC, RD, RA, RB, RE); LK2(RE, RC, RD, RB, RA, 14);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) S(S6, RE, RC, RD, RB, RA); LK2(RD, RA, RC, RB, RE, 15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) S(S7, RD, RA, RC, RB, RE); LK2(RE, RC, RB, RD, RA, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) S(S0, RE, RC, RB, RD, RA); LK2(RB, RC, RD, RE, RA, 17);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) S(S1, RB, RC, RD, RE, RA); LK2(RA, RD, RE, RB, RC, 18);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) S(S2, RA, RD, RE, RB, RC); LK2(RC, RD, RA, RB, RE, 19);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) S(S3, RC, RD, RA, RB, RE); LK2(RB, RE, RD, RC, RA, 20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) S(S4, RB, RE, RD, RC, RA); LK2(RE, RD, RC, RA, RB, 21);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) S(S5, RE, RD, RC, RA, RB); LK2(RB, RE, RD, RA, RC, 22);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) S(S6, RB, RE, RD, RA, RC); LK2(RD, RC, RE, RA, RB, 23);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) S(S7, RD, RC, RE, RA, RB); LK2(RB, RE, RA, RD, RC, 24);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) S(S0, RB, RE, RA, RD, RC); LK2(RA, RE, RD, RB, RC, 25);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) S(S1, RA, RE, RD, RB, RC); LK2(RC, RD, RB, RA, RE, 26);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) S(S2, RC, RD, RB, RA, RE); LK2(RE, RD, RC, RA, RB, 27);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) S(S3, RE, RD, RC, RA, RB); LK2(RA, RB, RD, RE, RC, 28);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) S(S4, RA, RB, RD, RE, RC); LK2(RB, RD, RE, RC, RA, 29);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) S(S5, RB, RD, RE, RC, RA); LK2(RA, RB, RD, RC, RE, 30);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) S(S6, RA, RB, RD, RC, RE); LK2(RD, RE, RB, RC, RA, 31);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) S(S7, RD, RE, RB, RC, RA); K2(RA, RB, RC, RD, RE, 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) SYM_FUNC_END(__serpent_enc_blk16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) .align 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) SYM_FUNC_START_LOCAL(__serpent_dec_blk16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) /* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) * %rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) * RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) * RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: plaintext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) vpcmpeqd RNOT, RNOT, RNOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) K2(RA, RB, RC, RD, RE, 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) SP(SI7, RA, RB, RC, RD, RE, 31); KL2(RB, RD, RA, RE, RC, 31);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) SP(SI6, RB, RD, RA, RE, RC, 30); KL2(RA, RC, RE, RB, RD, 30);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) SP(SI5, RA, RC, RE, RB, RD, 29); KL2(RC, RD, RA, RE, RB, 29);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) SP(SI4, RC, RD, RA, RE, RB, 28); KL2(RC, RA, RB, RE, RD, 28);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) SP(SI3, RC, RA, RB, RE, RD, 27); KL2(RB, RC, RD, RE, RA, 27);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) SP(SI2, RB, RC, RD, RE, RA, 26); KL2(RC, RA, RE, RD, RB, 26);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) SP(SI1, RC, RA, RE, RD, RB, 25); KL2(RB, RA, RE, RD, RC, 25);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) SP(SI0, RB, RA, RE, RD, RC, 24); KL2(RE, RC, RA, RB, RD, 24);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) SP(SI7, RE, RC, RA, RB, RD, 23); KL2(RC, RB, RE, RD, RA, 23);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) SP(SI6, RC, RB, RE, RD, RA, 22); KL2(RE, RA, RD, RC, RB, 22);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) SP(SI5, RE, RA, RD, RC, RB, 21); KL2(RA, RB, RE, RD, RC, 21);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) SP(SI4, RA, RB, RE, RD, RC, 20); KL2(RA, RE, RC, RD, RB, 20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) SP(SI3, RA, RE, RC, RD, RB, 19); KL2(RC, RA, RB, RD, RE, 19);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) SP(SI2, RC, RA, RB, RD, RE, 18); KL2(RA, RE, RD, RB, RC, 18);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) SP(SI1, RA, RE, RD, RB, RC, 17); KL2(RC, RE, RD, RB, RA, 17);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) SP(SI0, RC, RE, RD, RB, RA, 16); KL2(RD, RA, RE, RC, RB, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) SP(SI7, RD, RA, RE, RC, RB, 15); KL2(RA, RC, RD, RB, RE, 15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) SP(SI6, RA, RC, RD, RB, RE, 14); KL2(RD, RE, RB, RA, RC, 14);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) SP(SI5, RD, RE, RB, RA, RC, 13); KL2(RE, RC, RD, RB, RA, 13);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) SP(SI4, RE, RC, RD, RB, RA, 12); KL2(RE, RD, RA, RB, RC, 12);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) SP(SI3, RE, RD, RA, RB, RC, 11); KL2(RA, RE, RC, RB, RD, 11);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) SP(SI2, RA, RE, RC, RB, RD, 10); KL2(RE, RD, RB, RC, RA, 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) SP(SI1, RE, RD, RB, RC, RA, 9); KL2(RA, RD, RB, RC, RE, 9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) SP(SI0, RA, RD, RB, RC, RE, 8); KL2(RB, RE, RD, RA, RC, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) SP(SI7, RB, RE, RD, RA, RC, 7); KL2(RE, RA, RB, RC, RD, 7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) SP(SI6, RE, RA, RB, RC, RD, 6); KL2(RB, RD, RC, RE, RA, 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) SP(SI5, RB, RD, RC, RE, RA, 5); KL2(RD, RA, RB, RC, RE, 5);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) SP(SI4, RD, RA, RB, RC, RE, 4); KL2(RD, RB, RE, RC, RA, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) SP(SI3, RD, RB, RE, RC, RA, 3); KL2(RE, RD, RA, RC, RB, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) SP(SI2, RE, RD, RA, RC, RB, 2); KL2(RD, RB, RC, RA, RE, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) SP(SI1, RD, RB, RC, RA, RE, 1); KL2(RE, RB, RC, RA, RD, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) S(SI0, RE, RB, RC, RA, RD); K2(RC, RD, RB, RE, RA, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) SYM_FUNC_END(__serpent_dec_blk16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) SYM_FUNC_START(serpent_ecb_enc_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) /* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) * %rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) * %rsi: dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) * %rdx: src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) call __serpent_enc_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) store_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) SYM_FUNC_END(serpent_ecb_enc_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) SYM_FUNC_START(serpent_ecb_dec_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) /* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) * %rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) * %rsi: dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) * %rdx: src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) call __serpent_dec_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) store_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) SYM_FUNC_END(serpent_ecb_dec_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) SYM_FUNC_START(serpent_cbc_dec_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) /* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) * %rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) * %rsi: dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) * %rdx: src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) call __serpent_dec_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) store_cbc_16way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) RK0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) SYM_FUNC_END(serpent_cbc_dec_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) SYM_FUNC_START(serpent_ctr_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) /* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) * %rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) * %rsi: dst (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) * %rdx: src (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) * %rcx: iv (little endian, 128bit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) load_ctr_16way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) call __serpent_enc_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) store_ctr_16way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) SYM_FUNC_END(serpent_ctr_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) SYM_FUNC_START(serpent_xts_enc_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) /* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) * %rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) * %rsi: dst (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) * %rdx: src (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) .Lxts_gf128mul_and_shl1_mask_0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) .Lxts_gf128mul_and_shl1_mask_1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) call __serpent_enc_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) store_xts_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) SYM_FUNC_END(serpent_xts_enc_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) SYM_FUNC_START(serpent_xts_dec_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) /* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) * %rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) * %rsi: dst (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) * %rdx: src (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) * %rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) .Lxts_gf128mul_and_shl1_mask_0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) .Lxts_gf128mul_and_shl1_mask_1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) call __serpent_dec_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) store_xts_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) SYM_FUNC_END(serpent_xts_dec_16way)