Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * x86_64/AVX2 assembler optimized version of Serpent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  * Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  * Based on AVX assembler implementation of Serpent by:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  *  Copyright © 2012 Johannes Goetzfried
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  *      <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) #include <asm/frame.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #include "glue_helper-asm-avx2.S"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) .file "serpent-avx2-asm_64.S"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) .section	.rodata.cst16.bswap128_mask, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) .Lbswap128_mask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) 	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) .section	.rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) .Lxts_gf128mul_and_shl1_mask_0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 	.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) .section	.rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) .Lxts_gf128mul_and_shl1_mask_1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 	.byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) #define CTX %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) #define RNOT %ymm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) #define tp  %ymm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) #define RA1 %ymm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) #define RA2 %ymm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) #define RB1 %ymm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) #define RB2 %ymm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) #define RC1 %ymm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) #define RC2 %ymm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) #define RD1 %ymm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) #define RD2 %ymm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) #define RE1 %ymm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) #define RE2 %ymm11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) #define RK0 %ymm12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) #define RK1 %ymm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) #define RK2 %ymm14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) #define RK3 %ymm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) #define RK0x %xmm12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) #define RK1x %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) #define RK2x %xmm14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) #define RK3x %xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) #define S0_1(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	vpor		x0,   x3, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	vpxor		x3,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	vpxor		x2,   x3, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 	vpxor		RNOT, x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	vpxor		x1,   tp, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	vpand		x0,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	vpxor		x4,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	vpxor		x0,   x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) #define S0_2(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	vpxor		x3,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 	vpor		x0,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 	vpxor		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	vpand		x1,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	vpxor		x2,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	vpxor		RNOT, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	vpxor		x4,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	vpxor		x2,   x1, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) #define S1_1(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	vpxor		x0,   x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	vpxor		x3,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 	vpxor		RNOT, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 	vpand		tp,   x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	vpor		tp,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	vpxor		x2,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	vpxor		x3,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	vpxor		x3,   tp, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) #define S1_2(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	vpxor		x4,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	vpor		x4,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 	vpxor		x2,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	vpand		x0,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	vpxor		x1,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	vpor		x0,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	vpxor		RNOT, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	vpxor		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	vpxor		x1,   x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) #define S2_1(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 	vpxor		RNOT, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	vpxor		x0,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	vpand		x2,   x0, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	vpxor		x3,   tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 	vpor		x0,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	vpxor		x1,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	vpxor		x1,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	vpand		tp,   x1, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) #define S2_2(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 	vpxor		x2,   tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	vpand		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	vpor		x1,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	vpxor		RNOT, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	vpxor		tp,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	vpxor		tp,   x0, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	vpxor		x2,   tp, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 	vpor		x2,   x1, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) #define S3_1(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	vpxor		x3,   x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	vpor		x0,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	vpand		x0,   x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	vpxor		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	vpxor		tp,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	vpand		x3,   tp, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	vpor		x4,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	vpxor		x3,   x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) #define S3_2(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	vpxor		x0,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	vpand		x3,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	vpand		x4,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	vpxor		x2,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	vpor		x1,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 	vpand		x1,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	vpxor		x3,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	vpxor		x3,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	vpxor		x2,   x3, x3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) #define S4_1(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 	vpand		x0,   x3, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 	vpxor		x3,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	vpxor		x2,   tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 	vpor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	vpxor		x1,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	vpxor		tp,   x3, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 	vpor		x0,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	vpxor		x1,   x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) #define S4_2(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	vpand		x0,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	vpxor		x4,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 	vpand		x2,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	vpxor		tp,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 	vpxor		x0,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 	vpor		x1,   tp, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 	vpxor		RNOT, x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	vpxor		x0,   x3, x3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) #define S5_1(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	vpor		x0,   x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	vpxor		tp,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	vpxor		RNOT, x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 	vpxor		x0,   x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	vpxor		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 	vpand		x4,   tp, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	vpor		x3,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	vpxor		x0,   x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) #define S5_2(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 	vpand		x3,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 	vpxor		x3,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	vpxor		x2,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 	vpxor		x1,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	vpand		x4,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 	vpxor		x2,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	vpand		x0,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	vpxor		x2,   x3, x3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) #define S6_1(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	vpxor		x0,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 	vpxor		x2,   x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 	vpxor		x0,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 	vpand		x3,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 	vpor		x3,   tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 	vpxor		RNOT, x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 	vpxor		tp,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 	vpxor		x2,   tp, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) #define S6_2(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	vpxor		x4,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 	vpxor		x0,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	vpand		x0,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	vpxor		x1,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 	vpand		x1,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 	vpxor		x0,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	vpxor		x2,   x1, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) #define S7_1(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	vpxor		RNOT, x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 	vpxor		RNOT, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	vpand		x2,   tp, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 	vpxor		x3,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	vpor		tp,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	vpxor		x2,   tp, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 	vpxor		x0,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 	vpor		x1,   x0, x0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) #define S7_2(x0, x1, x2, x3, x4)      \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 	vpand		x0,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 	vpxor		x4,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 	vpxor		x3,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 	vpand		x0,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 	vpxor		x1,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	vpxor		x4,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 	vpxor		x1,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 	vpor		x0,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	vpxor		x1,   x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) #define SI0_1(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 	vpxor		x0,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 	vpor		x1,   x3, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) 	vpxor		x1,   x3, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 	vpxor		RNOT, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 	vpxor		tp,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	vpxor		x0,   tp, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 	vpand		x1,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	vpxor		x2,   x0, x0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) #define SI0_2(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 	vpand		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	vpxor		x4,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	vpxor		x3,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 	vpand		x0,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 	vpxor		x0,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	vpxor		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 	vpxor		x3,   x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) #define SI1_1(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 	vpxor		x3,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 	vpxor		x2,   x0, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	vpxor		RNOT, x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 	vpor		x1,   x0, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 	vpxor		x3,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	vpand		x1,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 	vpxor		x2,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	vpand		x4,   x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) #define SI1_2(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	vpxor		x1,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 	vpor		x3,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	vpxor		tp,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 	vpxor		tp,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 	vpor		x4,   tp, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 	vpxor		x4,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 	vpxor		x0,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 	vpxor		x1,   x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) #define SI2_1(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 	vpxor		x1,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 	vpxor		RNOT, x3, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 	vpor		x2,   tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 	vpxor		x0,   x3, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 	vpxor		x1,   tp, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 	vpor		x2,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 	vpxor		x0,   x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) #define SI2_2(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 	vpxor		x4,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 	vpor		x3,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 	vpxor		x2,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 	vpand		x1,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 	vpxor		x4,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 	vpxor		x0,   x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) #define SI3_1(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 	vpxor		x1,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 	vpand		x2,   x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 	vpxor		x0,   tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 	vpor		x1,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 	vpxor		x3,   x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 	vpxor		x3,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 	vpor		tp,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 	vpxor		x2,   tp, x1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) #define SI3_2(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 	vpxor		x3,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 	vpxor		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	vpand		x1,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 	vpxor		x0,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	vpand		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 	vpxor		x3,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 	vpxor		x0,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	vpxor		x1,   x0, x0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) #define SI4_1(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	vpand		x1,   x0, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	vpxor		x2,   tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 	vpor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 	vpxor		RNOT, x0, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 	vpxor		tp,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 	vpxor		x2,   tp, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 	vpand		x4,   x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) #define SI4_2(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 	vpxor		x0,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 	vpor		x4,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 	vpxor		x3,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 	vpand		x2,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 	vpxor		x3,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 	vpxor		x1,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 	vpand		x0,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 	vpxor		x1,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) 	vpxor		x3,   x0, x0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) #define SI5_1(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 	vpor		x2,   x1, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) 	vpxor		x1,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 	vpxor		x3,   tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) 	vpand		x1,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 	vpor		x0,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 	vpxor		RNOT, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) 	vpxor		x2,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 	vpor		x0,   x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) #define SI5_2(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 	vpxor		tp,   x1, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 	vpxor		x4,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 	vpand		x0,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 	vpxor		tp,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 	vpxor		x3,   tp, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	vpand		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 	vpxor		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 	vpxor		x4,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 	vpxor		x3,   x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) #define SI6_1(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 	vpxor		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 	vpand		x3,   x0, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 	vpxor		x2,   tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 	vpxor		x1,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 	vpor		x0,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 	vpxor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 	vpand		tp,   x3, x3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) #define SI6_2(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 	vpxor		RNOT, tp, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 	vpxor		x1,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 	vpand		x2,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 	vpxor		tp,   x0, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 	vpxor		x4,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 	vpxor		x2,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 	vpxor		x1,   tp, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 	vpxor		x0,   x2, x2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) #define SI7_1(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 	vpand		x0,   x3, tp; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 	vpxor		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 	vpor		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 	vpxor		x1,   x3, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 	vpxor		RNOT, x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 	vpor		tp,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 	vpxor		x0,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 	vpand		x2,   x0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 	vpxor		x1,   x0, x0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) #define SI7_2(x0, x1, x2, x3, x4)     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 	vpand		x2,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 	vpxor		x2,   tp, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 	vpxor		x3,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 	vpand		x3,   x2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 	vpor		x0,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 	vpxor		x4,   x1, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 	vpxor		x4,   x3, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 	vpand		x0,   x4, x4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 	vpxor		x2,   x4, x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) #define get_key(i,j,t) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 	vpbroadcastd (4*(i)+(j))*4(CTX), t;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) #define K2(x0, x1, x2, x3, x4, i) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 	get_key(i, 0, RK0); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 	get_key(i, 1, RK1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 	get_key(i, 2, RK2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 	get_key(i, 3, RK3); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 	vpxor RK0,	x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 	vpxor RK1,	x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 	vpxor RK2,	x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 	vpxor RK3,	x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 		vpxor RK0,	x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 		vpxor RK1,	x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 		vpxor RK2,	x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 		vpxor RK3,	x3 ## 2, x3 ## 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) #define LK2(x0, x1, x2, x3, x4, i) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 	vpslld $13,		x0 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 	vpsrld $(32 - 13),	x0 ## 1, x0 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) 	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) 	vpxor			x0 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 	vpslld $3,		x2 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 	vpsrld $(32 - 3),	x2 ## 1, x2 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 	vpxor			x2 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 		vpslld $13,		x0 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 		vpsrld $(32 - 13),	x0 ## 2, x0 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 		vpxor			x0 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 		vpslld $3,		x2 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 		vpsrld $(32 - 3),	x2 ## 2, x2 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) 		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 		vpxor			x2 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 	vpslld $1,		x1 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 	vpsrld $(32 - 1),	x1 ## 1, x1 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 	vpor			x4 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 	vpslld $3,		x0 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 	vpxor			x2 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 	vpxor			x4 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 	get_key(i, 1, RK1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 		vpslld $1,		x1 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 		vpsrld $(32 - 1),	x1 ## 2, x1 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 		vpor			x4 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 		vpslld $3,		x0 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 		vpxor			x2 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 		vpxor			x4 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 		get_key(i, 3, RK3); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 	vpslld $7,		x3 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 	vpsrld $(32 - 7),	x3 ## 1, x3 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 	vpor			x4 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 	vpslld $7,		x1 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 	vpxor			x1 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 	vpxor			x3 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 	vpxor			x3 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 	vpxor			x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 	get_key(i, 0, RK0); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 		vpslld $7,		x3 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 		vpsrld $(32 - 7),	x3 ## 2, x3 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 		vpor			x4 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 		vpslld $7,		x1 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 		vpxor			x1 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 		vpxor			x3 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 		vpxor			x3 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 		vpxor			x4 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 		get_key(i, 2, RK2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 	vpxor			RK1, x1 ## 1, x1 ## 1;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 	vpxor			RK3, x3 ## 1, x3 ## 1;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 	vpslld $5,		x0 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 	vpsrld $(32 - 5),	x0 ## 1, x0 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 	vpslld $22,		x2 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 	vpsrld $(32 - 22),	x2 ## 1, x2 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 	vpxor			RK0, x0 ## 1, x0 ## 1;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 	vpxor			RK2, x2 ## 1, x2 ## 1;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 		vpxor			RK1, x1 ## 2, x1 ## 2;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 		vpxor			RK3, x3 ## 2, x3 ## 2;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 		vpslld $5,		x0 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 		vpsrld $(32 - 5),	x0 ## 2, x0 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 		vpslld $22,		x2 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 		vpsrld $(32 - 22),	x2 ## 2, x2 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 		vpxor			RK0, x0 ## 2, x0 ## 2;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 		vpxor			RK2, x2 ## 2, x2 ## 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) #define KL2(x0, x1, x2, x3, x4, i) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 	vpxor			RK0, x0 ## 1, x0 ## 1;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) 	vpxor			RK2, x2 ## 1, x2 ## 1;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 	vpsrld $5,		x0 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) 	vpslld $(32 - 5),	x0 ## 1, x0 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) 	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 	vpxor			RK3, x3 ## 1, x3 ## 1;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) 	vpxor			RK1, x1 ## 1, x1 ## 1;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) 	vpsrld $22,		x2 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) 	vpslld $(32 - 22),	x2 ## 1, x2 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 	vpxor			x3 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 		vpxor			RK0, x0 ## 2, x0 ## 2;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 		vpxor			RK2, x2 ## 2, x2 ## 2;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 		vpsrld $5,		x0 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 		vpslld $(32 - 5),	x0 ## 2, x0 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) 		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) 		vpxor			RK3, x3 ## 2, x3 ## 2;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 		vpxor			RK1, x1 ## 2, x1 ## 2;     \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 		vpsrld $22,		x2 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) 		vpslld $(32 - 22),	x2 ## 2, x2 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 		vpor			x4 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 		vpxor			x3 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) 	vpxor			x3 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 	vpslld $7,		x1 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) 	vpxor			x1 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) 	vpxor			x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) 	vpsrld $1,		x1 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 	vpslld $(32 - 1),	x1 ## 1, x1 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) 	vpor			x4 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 		vpxor			x3 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 		vpslld $7,		x1 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 		vpxor			x1 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 		vpxor			x4 ## 2, x2 ## 2, x2 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 		vpsrld $1,		x1 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 		vpslld $(32 - 1),	x1 ## 2, x1 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) 		vpor			x4 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 	vpsrld $7,		x3 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 	vpslld $(32 - 7),	x3 ## 1, x3 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) 	vpor			x4 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 	vpxor			x0 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) 	vpslld $3,		x0 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) 	vpxor			x4 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) 		vpsrld $7,		x3 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) 		vpslld $(32 - 7),	x3 ## 2, x3 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) 		vpor			x4 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) 		vpxor			x0 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) 		vpslld $3,		x0 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 		vpxor			x4 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) 	vpsrld $13,		x0 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) 	vpslld $(32 - 13),	x0 ## 1, x0 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) 	vpor			x4 ## 1, x0 ## 1, x0 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 	vpxor			x2 ## 1, x1 ## 1, x1 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) 	vpxor			x2 ## 1, x3 ## 1, x3 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) 	vpsrld $3,		x2 ## 1, x4 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) 	vpslld $(32 - 3),	x2 ## 1, x2 ## 1;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) 	vpor			x4 ## 1, x2 ## 1, x2 ## 1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) 		vpsrld $13,		x0 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) 		vpslld $(32 - 13),	x0 ## 2, x0 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) 		vpor			x4 ## 2, x0 ## 2, x0 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) 		vpxor			x2 ## 2, x1 ## 2, x1 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) 		vpxor			x2 ## 2, x3 ## 2, x3 ## 2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) 		vpsrld $3,		x2 ## 2, x4 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) 		vpslld $(32 - 3),	x2 ## 2, x2 ## 2;          \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) 		vpor			x4 ## 2, x2 ## 2, x2 ## 2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) #define S(SBOX, x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) 	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) 	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) 	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) 	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) #define SP(SBOX, x0, x1, x2, x3, x4, i) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) 	get_key(i, 0, RK0); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) 	SBOX ## _1(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) 	get_key(i, 2, RK2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) 	SBOX ## _2(x0 ## 1, x1 ## 1, x2 ## 1, x3 ## 1, x4 ## 1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) 	get_key(i, 3, RK3); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) 	SBOX ## _1(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) 	get_key(i, 1, RK1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) 	SBOX ## _2(x0 ## 2, x1 ## 2, x2 ## 2, x3 ## 2, x4 ## 2); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) #define transpose_4x4(x0, x1, x2, x3, t0, t1, t2) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 	vpunpckldq		x1, x0, t0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) 	vpunpckhdq		x1, x0, t2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) 	vpunpckldq		x3, x2, t1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) 	vpunpckhdq		x3, x2, x3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) 	\
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) 	vpunpcklqdq		t1, t0, x0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) 	vpunpckhqdq		t1, t0, x1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) 	vpunpcklqdq		x3, t2, x2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) 	vpunpckhqdq		x3, t2, x3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) #define read_blocks(x0, x1, x2, x3, t0, t1, t2) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) #define write_blocks(x0, x1, x2, x3, t0, t1, t2) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) 	transpose_4x4(x0, x1, x2, x3, t0, t1, t2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) .align 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) SYM_FUNC_START_LOCAL(__serpent_enc_blk16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) 	/* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) 	 *	%rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: plaintext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) 	 * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) 	vpcmpeqd RNOT, RNOT, RNOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) 	read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) 	read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) 						 K2(RA, RB, RC, RD, RE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) 	S(S0, RA, RB, RC, RD, RE);		LK2(RC, RB, RD, RA, RE, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) 	S(S1, RC, RB, RD, RA, RE);		LK2(RE, RD, RA, RC, RB, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) 	S(S2, RE, RD, RA, RC, RB);		LK2(RB, RD, RE, RC, RA, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) 	S(S3, RB, RD, RE, RC, RA);		LK2(RC, RA, RD, RB, RE, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) 	S(S4, RC, RA, RD, RB, RE);		LK2(RA, RD, RB, RE, RC, 5);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) 	S(S5, RA, RD, RB, RE, RC);		LK2(RC, RA, RD, RE, RB, 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) 	S(S6, RC, RA, RD, RE, RB);		LK2(RD, RB, RA, RE, RC, 7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) 	S(S7, RD, RB, RA, RE, RC);		LK2(RC, RA, RE, RD, RB, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) 	S(S0, RC, RA, RE, RD, RB);		LK2(RE, RA, RD, RC, RB, 9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) 	S(S1, RE, RA, RD, RC, RB);		LK2(RB, RD, RC, RE, RA, 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) 	S(S2, RB, RD, RC, RE, RA);		LK2(RA, RD, RB, RE, RC, 11);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) 	S(S3, RA, RD, RB, RE, RC);		LK2(RE, RC, RD, RA, RB, 12);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) 	S(S4, RE, RC, RD, RA, RB);		LK2(RC, RD, RA, RB, RE, 13);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) 	S(S5, RC, RD, RA, RB, RE);		LK2(RE, RC, RD, RB, RA, 14);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) 	S(S6, RE, RC, RD, RB, RA);		LK2(RD, RA, RC, RB, RE, 15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) 	S(S7, RD, RA, RC, RB, RE);		LK2(RE, RC, RB, RD, RA, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) 	S(S0, RE, RC, RB, RD, RA);		LK2(RB, RC, RD, RE, RA, 17);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) 	S(S1, RB, RC, RD, RE, RA);		LK2(RA, RD, RE, RB, RC, 18);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) 	S(S2, RA, RD, RE, RB, RC);		LK2(RC, RD, RA, RB, RE, 19);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) 	S(S3, RC, RD, RA, RB, RE);		LK2(RB, RE, RD, RC, RA, 20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) 	S(S4, RB, RE, RD, RC, RA);		LK2(RE, RD, RC, RA, RB, 21);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) 	S(S5, RE, RD, RC, RA, RB);		LK2(RB, RE, RD, RA, RC, 22);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) 	S(S6, RB, RE, RD, RA, RC);		LK2(RD, RC, RE, RA, RB, 23);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) 	S(S7, RD, RC, RE, RA, RB);		LK2(RB, RE, RA, RD, RC, 24);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) 	S(S0, RB, RE, RA, RD, RC);		LK2(RA, RE, RD, RB, RC, 25);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) 	S(S1, RA, RE, RD, RB, RC);		LK2(RC, RD, RB, RA, RE, 26);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) 	S(S2, RC, RD, RB, RA, RE);		LK2(RE, RD, RC, RA, RB, 27);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) 	S(S3, RE, RD, RC, RA, RB);		LK2(RA, RB, RD, RE, RC, 28);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) 	S(S4, RA, RB, RD, RE, RC);		LK2(RB, RD, RE, RC, RA, 29);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) 	S(S5, RB, RD, RE, RC, RA);		LK2(RA, RB, RD, RC, RE, 30);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) 	S(S6, RA, RB, RD, RC, RE);		LK2(RD, RE, RB, RC, RA, 31);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) 	S(S7, RD, RE, RB, RC, RA);		 K2(RA, RB, RC, RD, RE, 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) 	write_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) 	write_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) 	ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) SYM_FUNC_END(__serpent_enc_blk16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) .align 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) SYM_FUNC_START_LOCAL(__serpent_dec_blk16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) 	/* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) 	 *	%rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) 	 *	RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2: ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) 	 * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) 	 *	RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2: plaintext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) 	vpcmpeqd RNOT, RNOT, RNOT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) 	read_blocks(RA1, RB1, RC1, RD1, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) 	read_blocks(RA2, RB2, RC2, RD2, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) 						 K2(RA, RB, RC, RD, RE, 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) 	SP(SI7, RA, RB, RC, RD, RE, 31);	KL2(RB, RD, RA, RE, RC, 31);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) 	SP(SI6, RB, RD, RA, RE, RC, 30);	KL2(RA, RC, RE, RB, RD, 30);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) 	SP(SI5, RA, RC, RE, RB, RD, 29);	KL2(RC, RD, RA, RE, RB, 29);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) 	SP(SI4, RC, RD, RA, RE, RB, 28);	KL2(RC, RA, RB, RE, RD, 28);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) 	SP(SI3, RC, RA, RB, RE, RD, 27);	KL2(RB, RC, RD, RE, RA, 27);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) 	SP(SI2, RB, RC, RD, RE, RA, 26);	KL2(RC, RA, RE, RD, RB, 26);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) 	SP(SI1, RC, RA, RE, RD, RB, 25);	KL2(RB, RA, RE, RD, RC, 25);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) 	SP(SI0, RB, RA, RE, RD, RC, 24);	KL2(RE, RC, RA, RB, RD, 24);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) 	SP(SI7, RE, RC, RA, RB, RD, 23);	KL2(RC, RB, RE, RD, RA, 23);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) 	SP(SI6, RC, RB, RE, RD, RA, 22);	KL2(RE, RA, RD, RC, RB, 22);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) 	SP(SI5, RE, RA, RD, RC, RB, 21);	KL2(RA, RB, RE, RD, RC, 21);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) 	SP(SI4, RA, RB, RE, RD, RC, 20);	KL2(RA, RE, RC, RD, RB, 20);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) 	SP(SI3, RA, RE, RC, RD, RB, 19);	KL2(RC, RA, RB, RD, RE, 19);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) 	SP(SI2, RC, RA, RB, RD, RE, 18);	KL2(RA, RE, RD, RB, RC, 18);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) 	SP(SI1, RA, RE, RD, RB, RC, 17);	KL2(RC, RE, RD, RB, RA, 17);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) 	SP(SI0, RC, RE, RD, RB, RA, 16);	KL2(RD, RA, RE, RC, RB, 16);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) 	SP(SI7, RD, RA, RE, RC, RB, 15);	KL2(RA, RC, RD, RB, RE, 15);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) 	SP(SI6, RA, RC, RD, RB, RE, 14);	KL2(RD, RE, RB, RA, RC, 14);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) 	SP(SI5, RD, RE, RB, RA, RC, 13);	KL2(RE, RC, RD, RB, RA, 13);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) 	SP(SI4, RE, RC, RD, RB, RA, 12);	KL2(RE, RD, RA, RB, RC, 12);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) 	SP(SI3, RE, RD, RA, RB, RC, 11);	KL2(RA, RE, RC, RB, RD, 11);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) 	SP(SI2, RA, RE, RC, RB, RD, 10);	KL2(RE, RD, RB, RC, RA, 10);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) 	SP(SI1, RE, RD, RB, RC, RA, 9);		KL2(RA, RD, RB, RC, RE, 9);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) 	SP(SI0, RA, RD, RB, RC, RE, 8);		KL2(RB, RE, RD, RA, RC, 8);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) 	SP(SI7, RB, RE, RD, RA, RC, 7);		KL2(RE, RA, RB, RC, RD, 7);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) 	SP(SI6, RE, RA, RB, RC, RD, 6);		KL2(RB, RD, RC, RE, RA, 6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) 	SP(SI5, RB, RD, RC, RE, RA, 5);		KL2(RD, RA, RB, RC, RE, 5);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) 	SP(SI4, RD, RA, RB, RC, RE, 4);		KL2(RD, RB, RE, RC, RA, 4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) 	SP(SI3, RD, RB, RE, RC, RA, 3);		KL2(RE, RD, RA, RC, RB, 3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) 	SP(SI2, RE, RD, RA, RC, RB, 2);		KL2(RD, RB, RC, RA, RE, 2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) 	SP(SI1, RD, RB, RC, RA, RE, 1);		KL2(RE, RB, RC, RA, RD, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) 	S(SI0, RE, RB, RC, RA, RD);		 K2(RC, RD, RB, RE, RA, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) 	write_blocks(RC1, RD1, RB1, RE1, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) 	write_blocks(RC2, RD2, RB2, RE2, RK0, RK1, RK2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) 	ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) SYM_FUNC_END(__serpent_dec_blk16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) SYM_FUNC_START(serpent_ecb_enc_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) 	/* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) 	 *	%rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674) 	 *	%rsi: dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) 	 *	%rdx: src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) 	load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) 	call __serpent_enc_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) 	store_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) 	ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) SYM_FUNC_END(serpent_ecb_enc_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) SYM_FUNC_START(serpent_ecb_dec_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694) 	/* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) 	 *	%rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) 	 *	%rsi: dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697) 	 *	%rdx: src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) 	load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) 	call __serpent_dec_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) 	store_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) 	ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) SYM_FUNC_END(serpent_ecb_dec_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) SYM_FUNC_START(serpent_cbc_dec_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) 	/* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) 	 *	%rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) 	 *	%rsi: dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) 	 *	%rdx: src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) 	load_16way(%rdx, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) 	call __serpent_dec_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729) 	store_cbc_16way(%rdx, %rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) 			RK0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) 	ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736) SYM_FUNC_END(serpent_cbc_dec_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) SYM_FUNC_START(serpent_ctr_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) 	/* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) 	 *	%rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) 	 *	%rsi: dst (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) 	 *	%rdx: src (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743) 	 *	%rcx: iv (little endian, 128bit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) 	load_ctr_16way(%rcx, .Lbswap128_mask, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) 		       RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) 		       tp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) 	call __serpent_enc_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) 	store_ctr_16way(%rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) 	ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) SYM_FUNC_END(serpent_ctr_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) SYM_FUNC_START(serpent_xts_enc_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764) 	/* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) 	 *	%rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) 	 *	%rsi: dst (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) 	 *	%rdx: src (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) 	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) 	load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) 		       RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) 		       .Lxts_gf128mul_and_shl1_mask_0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) 		       .Lxts_gf128mul_and_shl1_mask_1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) 	call __serpent_enc_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) 	store_xts_16way(%rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2, RD2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) 	ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) SYM_FUNC_END(serpent_xts_enc_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789) SYM_FUNC_START(serpent_xts_dec_16way)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790) 	/* input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) 	 *	%rdi: ctx, CTX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) 	 *	%rsi: dst (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) 	 *	%rdx: src (16 blocks)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794) 	 *	%rcx: iv (t ⊕ αⁿ ∈ GF(2¹²⁸))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800) 	load_xts_16way(%rcx, %rdx, %rsi, RA1, RB1, RC1, RD1, RA2, RB2, RC2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) 		       RD2, RK0, RK0x, RK1, RK1x, RK2, RK2x, RK3, RK3x, RNOT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802) 		       .Lxts_gf128mul_and_shl1_mask_0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) 		       .Lxts_gf128mul_and_shl1_mask_1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) 	call __serpent_dec_blk16;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) 	store_xts_16way(%rsi, RC1, RD1, RB1, RE1, RC2, RD2, RB2, RE2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) 	vzeroupper;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) 	ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) SYM_FUNC_END(serpent_xts_dec_16way)