Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  * Implement AES algorithm in Intel AES-NI instructions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  * The white paper of AES-NI instructions can be downloaded from:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  *   http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  * Copyright (C) 2008, Intel Corp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  *    Author: Huang Ying <ying.huang@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  *            Vinodh Gopal <vinodh.gopal@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  *            Kahraman Akdemir
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  * interface for 64-bit kernels.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  *    Authors: Erdinc Ozturk (erdinc.ozturk@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16)  *             Aidan O'Mahony (aidan.o.mahony@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17)  *             Adrian Hoban <adrian.hoban@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18)  *             James Guilford (james.guilford@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19)  *             Gabriele Paoloni <gabriele.paoloni@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20)  *             Tadeusz Struk (tadeusz.struk@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21)  *             Wajdi Feghali (wajdi.k.feghali@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22)  *    Copyright (c) 2010, Intel Corporation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24)  * Ported x86_64 version to x86:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25)  *    Author: Mathias Krause <minipli@googlemail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <asm/frame.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <asm/nospec-branch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33)  * The following macros are used to move an (un)aligned 16 byte value to/from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34)  * an XMM register.  This can done for either FP or integer values, for FP use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  * movaps (move aligned packed single) or integer use movdqa (move double quad
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  * aligned).  It doesn't make a performance difference which instruction is used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37)  * since Nehalem (original Core i7) was released.  However, the movaps is a byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38)  * shorter, so that is the one we'll use for now. (same for unaligned).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) #define MOVADQ	movaps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) #define MOVUDQ	movups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) # constants in mergeable sections, linker can reorder and merge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) .section	.rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) .Lgf128mul_x_ble_mask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) 	.octa 0x00000000000000010000000000000087
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) .section	.rodata.cst16.POLY, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) POLY:   .octa 0xC2000000000000000000000000000001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) .section	.rodata.cst16.TWOONE, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) TWOONE: .octa 0x00000001000000000000000000000001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) .section	.rodata.cst16.SHUF_MASK, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) SHUF_MASK:  .octa 0x000102030405060708090A0B0C0D0E0F
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) .section	.rodata.cst16.MASK1, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) MASK1:      .octa 0x0000000000000000ffffffffffffffff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) .section	.rodata.cst16.MASK2, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) MASK2:      .octa 0xffffffffffffffff0000000000000000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) .section	.rodata.cst16.ONE, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) ONE:        .octa 0x00000000000000000000000000000001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) .section	.rodata.cst16.F_MIN_MASK, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) .section	.rodata.cst16.dec, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) dec:        .octa 0x1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) .section	.rodata.cst16.enc, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) enc:        .octa 0x2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) # order of these constants should not change.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) # more specifically, ALL_F should follow SHIFT_MASK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) # and zero should follow ALL_F
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) .section	.rodata, "a", @progbits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) ALL_F:      .octa 0xffffffffffffffffffffffffffffffff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86)             .octa 0x00000000000000000000000000000000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) #define	STACK_OFFSET    8*3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) #define AadHash 16*0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) #define AadLen 16*1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) #define InLen (16*1)+8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) #define PBlockEncKey 16*2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) #define OrigIV 16*3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) #define CurCount 16*4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) #define PBlockLen 16*5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) #define	HashKey		16*6	// store HashKey <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) #define	HashKey_2	16*7	// store HashKey^2 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) #define	HashKey_3	16*8	// store HashKey^3 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) #define	HashKey_4	16*9	// store HashKey^4 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) #define	HashKey_k	16*10	// store XOR of High 64 bits and Low 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 				// bits of  HashKey <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 				//(for Karatsuba purposes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) #define	HashKey_2_k	16*11	// store XOR of High 64 bits and Low 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 				// bits of  HashKey^2 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) 				// (for Karatsuba purposes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) #define	HashKey_3_k	16*12	// store XOR of High 64 bits and Low 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) 				// bits of  HashKey^3 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 				// (for Karatsuba purposes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) #define	HashKey_4_k	16*13	// store XOR of High 64 bits and Low 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 				// bits of  HashKey^4 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 				// (for Karatsuba purposes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) #define arg1 rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) #define arg2 rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) #define arg3 rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) #define arg4 rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) #define arg5 r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) #define arg6 r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) #define arg7 STACK_OFFSET+8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) #define arg8 STACK_OFFSET+16(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) #define arg9 STACK_OFFSET+24(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) #define arg10 STACK_OFFSET+32(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) #define arg11 STACK_OFFSET+40(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) #define keysize 2*15*16(%arg1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) #define STATE1	%xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) #define STATE2	%xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) #define STATE3	%xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) #define STATE4	%xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) #define STATE	STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) #define IN1	%xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) #define IN2	%xmm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) #define IN3	%xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) #define IN4	%xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) #define IN	IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) #define KEY	%xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) #define IV	%xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) #define BSWAP_MASK %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) #define CTR	%xmm11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) #define INC	%xmm12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) #define GF128MUL_MASK %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) #define AREG	%rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) #define KEYP	%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) #define OUTP	%rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) #define UKEYP	OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) #define INP	%rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) #define LEN	%rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) #define IVP	%r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) #define KLEN	%r9d
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) #define T1	%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) #define TKEYP	T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) #define T2	%r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) #define TCTR_LOW T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) #define AREG	%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) #define KEYP	%edi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) #define OUTP	AREG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) #define UKEYP	OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) #define INP	%edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) #define LEN	%esi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) #define IVP	%ebp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) #define KLEN	%ebx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) #define T1	%ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) #define TKEYP	T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) .macro FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	push	%r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 	push	%r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	push	%r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) # states of %xmm registers %xmm6:%xmm15 not saved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) # all %xmm registers are clobbered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) .macro FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 	pop	%r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	pop	%r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 	pop	%r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) # Precompute hashkeys.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) # Input: Hash subkey.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) # Output: HashKeys stored in gcm_context_data.  Only needs to be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) # once per key.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) # clobbers r12, and tmp xmm registers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) .macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 	mov	\SUBKEY, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 	movdqu	(%r12), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 	movdqa	SHUF_MASK(%rip), \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 	pshufb	\TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	# precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 	movdqa	\TMP3, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	psllq	$1, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 	psrlq	$63, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 	movdqa	\TMP2, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 	pslldq	$8, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 	psrldq	$8, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 	por	\TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 	# reduce HashKey<<1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	pshufd	$0x24, \TMP1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	pcmpeqd TWOONE(%rip), \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	pand	POLY(%rip), \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 	pxor	\TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	movdqu	\TMP3, HashKey(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	movdqa	   \TMP3, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 	pshufd	   $78, \TMP3, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 	pxor	   \TMP3, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 	movdqu	   \TMP1, HashKey_k(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) # TMP5 = HashKey^2<<1 (mod poly)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	movdqu	   \TMP5, HashKey_2(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) # HashKey_2 = HashKey^2<<1 (mod poly)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	pshufd	   $78, \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	pxor	   \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 	movdqu	   \TMP1, HashKey_2_k(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) # TMP5 = HashKey^3<<1 (mod poly)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	movdqu	   \TMP5, HashKey_3(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 	pshufd	   $78, \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	pxor	   \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	movdqu	   \TMP1, HashKey_3_k(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 	GHASH_MUL  \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) # TMP5 = HashKey^3<<1 (mod poly)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) 	movdqu	   \TMP5, HashKey_4(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	pshufd	   $78, \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 	pxor	   \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 	movdqu	   \TMP1, HashKey_4_k(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) # GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) # Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) .macro GCM_INIT Iv SUBKEY AAD AADLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 	mov \AADLEN, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 	mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 	xor %r11d, %r11d
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 	mov %r11, InLen(%arg2) # ctx_data.in_length = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 	mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 	mov \Iv, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 	movdqu (%rax), %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	movdqa  SHUF_MASK(%rip), %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 	pshufb %xmm2, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 	movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 	PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 	movdqu HashKey(%arg2), %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 	CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 	%xmm4, %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) # GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) # struct has been initialized by GCM_INIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) # Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) # Clobbers rax, r10-r13, and xmm0-xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) .macro GCM_ENC_DEC operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	movdqu AadHash(%arg2), %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 	movdqu HashKey(%arg2), %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 	add %arg5, InLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 	xor %r11d, %r11d # initialise the data pointer offset as zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 	sub %r11, %arg5		# sub partial block data used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) 	mov %arg5, %r13		# save the number of bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 	and $-16, %r13		# %r13 = %r13 - (%r13 mod 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 	mov %r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	# Encrypt/Decrypt first few blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) 	and	$(3<<4), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 	jz	_initial_num_blocks_is_0_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) 	cmp	$(2<<4), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 	jb	_initial_num_blocks_is_1_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	je	_initial_num_blocks_is_2_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) _initial_num_blocks_is_3_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	sub	$48, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 	jmp	_initial_blocks_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) _initial_num_blocks_is_2_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	sub	$32, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	jmp	_initial_blocks_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) _initial_num_blocks_is_1_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	sub	$16, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 	jmp	_initial_blocks_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) _initial_num_blocks_is_0_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	INITIAL_BLOCKS_ENC_DEC	%xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) _initial_blocks_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 	# Main loop - Encrypt/Decrypt remaining blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	test	%r13, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	je	_zero_cipher_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 	sub	$64, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 	je	_four_cipher_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) _crypt_by_4_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 	GHASH_4_ENCRYPT_4_PARALLEL_\operation	%xmm9, %xmm10, %xmm11, %xmm12, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 	%xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 	%xmm7, %xmm8, enc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 	add	$64, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	sub	$64, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	jne	_crypt_by_4_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) _four_cipher_left_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 	GHASH_LAST_4	%xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) _zero_cipher_left_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 	movdqu %xmm8, AadHash(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 	movdqu %xmm0, CurCount(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	mov	%arg5, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 	and	$15, %r13			# %r13 = arg5 (mod 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 	je	_multiple_of_16_bytes_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 	mov %r13, PBlockLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 	# Handle the last <16 Byte block separately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 	paddd ONE(%rip), %xmm0                # INCR CNT to get Yn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 	movdqu %xmm0, CurCount(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 	movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	pshufb %xmm10, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	ENCRYPT_SINGLE_BLOCK	%xmm0, %xmm1        # Encrypt(K, Yn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) 	movdqu %xmm0, PBlockEncKey(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	cmp	$16, %arg5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 	jge _large_enough_update_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	lea (%arg4,%r11,1), %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	mov %r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	jmp _data_read_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) _large_enough_update_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	sub	$16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	add	%r13, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 	# receive the last <16 Byte block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	movdqu	(%arg4, %r11, 1), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 	sub	%r13, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 	add	$16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	lea	SHIFT_MASK+16(%rip), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 	# adjust the shuffle mask pointer to be able to shift 16-r13 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 	# (r13 is the number of bytes in plaintext mod 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	sub	%r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 	# get the appropriate shuffle mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 	movdqu	(%r12), %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	# shift right 16-r13 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	pshufb  %xmm2, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) _data_read_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 	lea ALL_F+16(%rip), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	sub %r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 	movdqa  %xmm1, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	pxor	%xmm1, %xmm0            # XOR Encrypt(K, Yn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 	movdqu	(%r12), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	# get the appropriate mask to mask out top 16-r13 bytes of xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	pand	%xmm1, %xmm0            # mask out top 16-r13 bytes of xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	pand    %xmm1, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	pshufb %xmm10 ,%xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 	pxor %xmm2, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) .else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	pshufb %xmm10,%xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	pxor	%xmm0, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 	movdqu %xmm8, AadHash(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) .ifc \operation, enc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 	# GHASH computation for the last <16 byte block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 	movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 	# shuffle xmm0 back to output as ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 	pshufb %xmm10, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	# Output %r13 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 	movq %xmm0, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 	cmp $8, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 	jle _less_than_8_bytes_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 	mov %rax, (%arg3 , %r11, 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	add $8, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) 	psrldq $8, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 	movq %xmm0, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	sub $8, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) _less_than_8_bytes_left_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	mov %al,  (%arg3, %r11, 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	add $1, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) 	shr $8, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 	sub $1, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 	jne _less_than_8_bytes_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) _multiple_of_16_bytes_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) # GCM_COMPLETE Finishes update of tag of last partial block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) # Output: Authorization Tag (AUTH_TAG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) # Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) .macro GCM_COMPLETE AUTHTAG AUTHTAGLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	movdqu AadHash(%arg2), %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	movdqu HashKey(%arg2), %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	mov PBlockLen(%arg2), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	test %r12, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 	je _partial_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) _partial_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	mov AadLen(%arg2), %r12  # %r13 = aadLen (number of bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 	shl	$3, %r12		  # convert into number of bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	movd	%r12d, %xmm15		  # len(A) in %xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	mov InLen(%arg2), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 	shl     $3, %r12                  # len(C) in bits (*128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	movq    %r12, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	pslldq	$8, %xmm15		  # %xmm15 = len(A)||0x0000000000000000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 	pxor	%xmm1, %xmm15		  # %xmm15 = len(A)||len(C)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 	pxor	%xmm15, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 	GHASH_MUL	%xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 	# final GHASH computation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 	movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 	pshufb %xmm10, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 	movdqu OrigIV(%arg2), %xmm0       # %xmm0 = Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 	ENCRYPT_SINGLE_BLOCK	%xmm0,  %xmm1	  # E(K, Y0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	pxor	%xmm8, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) _return_T_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	mov	\AUTHTAG, %r10                     # %r10 = authTag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 	mov	\AUTHTAGLEN, %r11                    # %r11 = auth_tag_len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 	cmp	$16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	je	_T_16_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 	cmp	$8, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 	jl	_T_4_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) _T_8_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 	movq	%xmm0, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 	mov	%rax, (%r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 	add	$8, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 	sub	$8, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 	psrldq	$8, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 	test	%r11, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	je	_return_T_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) _T_4_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	movd	%xmm0, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 	mov	%eax, (%r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	add	$4, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 	sub	$4, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	psrldq	$4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 	test	%r11, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	je	_return_T_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) _T_123_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 	movd	%xmm0, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	cmp	$2, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 	jl	_T_1_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 	mov	%ax, (%r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	cmp	$2, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 	je	_return_T_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	add	$2, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	sar	$16, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) _T_1_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 	mov	%al, (%r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	jmp	_return_T_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) _T_16_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 	movdqu	%xmm0, (%r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) _return_T_done_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) * Input: A and B (128-bits each, bit-reflected)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) * Output: C = A*B*x mod poly, (i.e. >>1 )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) * To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) * GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) .macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	movdqa	  \GH, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	pshufd	  $78, \GH, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 	pshufd	  $78, \HK, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	pxor	  \GH, \TMP2            # TMP2 = a1+a0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	pxor	  \HK, \TMP3            # TMP3 = b1+b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	pclmulqdq $0x11, \HK, \TMP1     # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 	pclmulqdq $0x00, \HK, \GH       # GH = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 	pclmulqdq $0x00, \TMP3, \TMP2   # TMP2 = (a0+a1)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 	pxor	  \GH, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 	pxor	  \TMP1, \TMP2          # TMP2 = (a0*b0)+(a1*b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 	movdqa	  \TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 	pslldq	  $8, \TMP3             # left shift TMP3 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	psrldq	  $8, \TMP2             # right shift TMP2 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	pxor	  \TMP3, \GH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 	pxor	  \TMP2, \TMP1          # TMP2:GH holds the result of GH*HK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531)         # first phase of the reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 	movdqa    \GH, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 	movdqa    \GH, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 	movdqa    \GH, \TMP4            # copy GH into TMP2,TMP3 and TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 					# in in order to perform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 					# independent shifts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	pslld     $31, \TMP2            # packed right shift <<31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	pslld     $30, \TMP3            # packed right shift <<30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	pslld     $25, \TMP4            # packed right shift <<25
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	pxor      \TMP3, \TMP2          # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 	pxor      \TMP4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 	movdqa    \TMP2, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	psrldq    $4, \TMP5             # right shift TMP5 1 DW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 	pslldq    $12, \TMP2            # left shift TMP2 3 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 	pxor      \TMP2, \GH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548)         # second phase of the reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 	movdqa    \GH,\TMP2             # copy GH into TMP2,TMP3 and TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 					# in in order to perform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 					# independent shifts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 	movdqa    \GH,\TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	movdqa    \GH,\TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 	psrld     $1,\TMP2              # packed left shift >>1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 	psrld     $2,\TMP3              # packed left shift >>2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 	psrld     $7,\TMP4              # packed left shift >>7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 	pxor      \TMP3,\TMP2		# xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 	pxor      \TMP4,\TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 	pxor      \TMP5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 	pxor      \TMP2, \GH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 	pxor      \TMP1, \GH            # result is in TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) # Reads DLEN bytes starting at DPTR and stores in XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) # where 0 < DLEN < 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) # Clobbers %rax, DLEN and XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) .macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569)         cmp $8, \DLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570)         jl _read_lt8_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571)         mov (\DPTR), %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572)         movq %rax, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573)         sub $8, \DLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574)         jz _done_read_partial_block_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 	xor %eax, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) _read_next_byte_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577)         shl $8, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578)         mov 7(\DPTR, \DLEN, 1), %al
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579)         dec \DLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580)         jnz _read_next_byte_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581)         movq %rax, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	pslldq $8, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583)         por \XMM1, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 	jmp _done_read_partial_block_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) _read_lt8_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	xor %eax, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) _read_next_byte_lt8_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588)         shl $8, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589)         mov -1(\DPTR, \DLEN, 1), %al
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590)         dec \DLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591)         jnz _read_next_byte_lt8_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592)         movq %rax, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) _done_read_partial_block_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) # CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) # clobbers r10-11, xmm14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) .macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	TMP6 TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 	MOVADQ	   SHUF_MASK(%rip), %xmm14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 	mov	   \AAD, %r10		# %r10 = AAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 	mov	   \AADLEN, %r11		# %r11 = aadLen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 	pxor	   \TMP7, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	pxor	   \TMP6, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	cmp	   $16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	jl	   _get_AAD_rest\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) _get_AAD_blocks\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 	movdqu	   (%r10), \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	pshufb	   %xmm14, \TMP7 # byte-reflect the AAD data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	pxor	   \TMP7, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	GHASH_MUL  \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	add	   $16, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 	sub	   $16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 	cmp	   $16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	jge	   _get_AAD_blocks\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 	movdqu	   \TMP6, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	/* read the last <16B of AAD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) _get_AAD_rest\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	test	   %r11, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	je	   _get_AAD_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	pshufb	   %xmm14, \TMP7 # byte-reflect the AAD data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	pxor	   \TMP6, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	GHASH_MUL  \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 	movdqu \TMP7, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) _get_AAD_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 	movdqu \TMP6, AadHash(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) # PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) # between update calls.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) # Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) # Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) # Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	AAD_HASH operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	mov 	PBlockLen(%arg2), %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	test	%r13, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 	je	_partial_block_done_\@	# Leave Macro if no partial blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 	# Read in input data without over reading
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 	cmp	$16, \PLAIN_CYPH_LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	jl	_fewer_than_16_bytes_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 	movups	(\PLAIN_CYPH_IN), %xmm1	# If more than 16 bytes, just fill xmm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 	jmp	_data_read_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) _fewer_than_16_bytes_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	lea	(\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	mov	\PLAIN_CYPH_LEN, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 	mov PBlockLen(%arg2), %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) _data_read_\@:				# Finished reading in data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 	movdqu	PBlockEncKey(%arg2), %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 	movdqu	HashKey(%arg2), %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 	lea	SHIFT_MASK(%rip), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	# adjust the shuffle mask pointer to be able to shift r13 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 	# r16-r13 is the number of bytes in plaintext mod 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 	add	%r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	movdqu	(%r12), %xmm2		# get the appropriate shuffle mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 	pshufb	%xmm2, %xmm9		# shift right r13 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 	movdqa	%xmm1, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 	pxor	%xmm1, %xmm9		# Cyphertext XOR E(K, Yn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 	mov	\PLAIN_CYPH_LEN, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	add	%r13, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 	sub	$16, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	# Determine if if partial block is not being filled and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 	# shift mask accordingly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 	jge	_no_extra_mask_1_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 	sub	%r10, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) _no_extra_mask_1_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	# get the appropriate mask to mask out bottom r13 bytes of xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	pand	%xmm1, %xmm9		# mask out bottom r13 bytes of xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 	pand	%xmm1, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 	movdqa	SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 	pshufb	%xmm10, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 	pshufb	%xmm2, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 	pxor	%xmm3, \AAD_HASH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	test	%r10, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	jl	_partial_incomplete_1_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 	# GHASH computation for the last <16 Byte block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	xor	%eax, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 	mov	%rax, PBlockLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	jmp	_dec_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) _partial_incomplete_1_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) _dec_done_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 	movdqu	\AAD_HASH, AadHash(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) .else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	pxor	%xmm1, %xmm9			# Plaintext XOR E(K, Yn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 	mov	\PLAIN_CYPH_LEN, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 	add	%r13, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 	# Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	sub	$16, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	# Determine if if partial block is not being filled and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	# shift mask accordingly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 	jge	_no_extra_mask_2_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	sub	%r10, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) _no_extra_mask_2_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 	movdqu	ALL_F-SHIFT_MASK(%r12), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	# get the appropriate mask to mask out bottom r13 bytes of xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 	pand	%xmm1, %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 	movdqa	SHUF_MASK(%rip), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 	pshufb	%xmm1, %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 	pshufb	%xmm2, %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 	pxor	%xmm9, \AAD_HASH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	test	%r10, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	jl	_partial_incomplete_2_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	# GHASH computation for the last <16 Byte block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 	GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	xor	%eax, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 	mov	%rax, PBlockLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	jmp	_encode_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) _partial_incomplete_2_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 	add	\PLAIN_CYPH_LEN, PBlockLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) _encode_done_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 	movdqu	\AAD_HASH, AadHash(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 	movdqa	SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	# shuffle xmm9 back to output as ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	pshufb	%xmm10, %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 	pshufb	%xmm2, %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 	# output encrypted Bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	test	%r10, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 	jl	_partial_fill_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	mov	%r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	mov	$16, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	# Set r13 to be the number of bytes to write out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 	sub	%r12, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 	jmp	_count_set_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) _partial_fill_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	mov	\PLAIN_CYPH_LEN, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) _count_set_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	movdqa	%xmm9, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) 	movq	%xmm0, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 	cmp	$8, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	jle	_less_than_8_bytes_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	mov	%rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 	add	$8, \DATA_OFFSET
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	psrldq	$8, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 	movq	%xmm0, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 	sub	$8, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) _less_than_8_bytes_left_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 	movb	%al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 	add	$1, \DATA_OFFSET
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	shr	$8, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 	sub	$1, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 	jne	_less_than_8_bytes_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) _partial_block_done_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) .endm # PARTIAL_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) * if a = number of total plaintext bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) * b = floor(a/16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) * num_initial_blocks = b mod 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) * encrypt the initial num_initial_blocks blocks and apply ghash on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) * the ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) * are clobbered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) * arg1, %arg2, %arg3 are used as a pointer only, not modified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) .macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 	MOVADQ		SHUF_MASK(%rip), %xmm14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 	movdqu AadHash(%arg2), %xmm\i		    # XMM0 = Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	# start AES for num_initial_blocks blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 	movdqu CurCount(%arg2), \XMM0                # XMM0 = Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) .if (\i == 5) || (\i == 6) || (\i == 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	MOVADQ		ONE(%RIP),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 	MOVADQ		0(%arg1),\TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) .irpc index, \i_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	paddd		\TMP1, \XMM0                 # INCR Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808)         movdqa     \XMM0, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) .else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 	MOVADQ		\XMM0, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 	pshufb	%xmm14, %xmm\index      # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 	pxor		\TMP2, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 	lea	0x10(%arg1),%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 	mov	keysize,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 	shr	$2,%eax				# 128->4, 192->6, 256->8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	add	$5,%eax			      # 128->9, 192->11, 256->13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) aes_loop_initial_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	MOVADQ	(%r10),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) .irpc	index, \i_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 	aesenc	\TMP1, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 	add	$16,%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 	sub	$1,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 	jnz	aes_loop_initial_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 	MOVADQ	(%r10), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) .irpc index, \i_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	aesenclast \TMP1, %xmm\index         # Last Round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) .irpc index, \i_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 	movdqu	   (%arg4 , %r11, 1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 	pxor	   \TMP1, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 	movdqu	   %xmm\index, (%arg3 , %r11, 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 	# write back plaintext/ciphertext for num_initial_blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 	add	   $16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	movdqa     \TMP1, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	pshufb	   %xmm14, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 		# prepare plaintext/ciphertext for GHASH computation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849)         # apply GHASH on num_initial_blocks blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) .if \i == 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852)         pxor       %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	GHASH_MUL  %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854)         pxor       %xmm6, %xmm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856)         pxor       %xmm7, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) .elseif \i == 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859)         pxor       %xmm6, %xmm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	GHASH_MUL  %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861)         pxor       %xmm7, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) .elseif \i == 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864)         pxor       %xmm7, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	GHASH_MUL  %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	cmp	   $64, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	jl	_initial_blocks_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	# no need for precomputed values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) * Precomputations for HashKey parallel with encryption of first 4 blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	MOVADQ	   ONE(%RIP),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 	paddd	   \TMP1, \XMM0              # INCR Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	MOVADQ	   \XMM0, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 	pshufb  %xmm14, \XMM1        # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) 	paddd	   \TMP1, \XMM0              # INCR Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	MOVADQ	   \XMM0, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	pshufb  %xmm14, \XMM2        # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	paddd	   \TMP1, \XMM0              # INCR Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 	MOVADQ	   \XMM0, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 	pshufb %xmm14, \XMM3        # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	paddd	   \TMP1, \XMM0              # INCR Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	MOVADQ	   \XMM0, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 	pshufb %xmm14, \XMM4        # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	MOVADQ	   0(%arg1),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	pxor	   \TMP1, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 	pxor	   \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 	pxor	   \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 	pxor	   \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) .irpc index, 1234 # do 4 rounds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 	movaps 0x10*\index(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	aesenc	   \TMP1, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	aesenc	   \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	aesenc	   \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	aesenc	   \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) .irpc index, 56789 # do next 5 rounds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	movaps 0x10*\index(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	aesenc	   \TMP1, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	aesenc	   \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	aesenc	   \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 	aesenc	   \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	lea	   0xa0(%arg1),%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 	mov	   keysize,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 	shr	   $2,%eax			# 128->4, 192->6, 256->8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 	sub	   $4,%eax			# 128->0, 192->2, 256->4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 	jz	   aes_loop_pre_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) aes_loop_pre_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	MOVADQ	   (%r10),\TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) .irpc	index, 1234
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 	aesenc	   \TMP2, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 	add	   $16,%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	sub	   $1,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 	jnz	   aes_loop_pre_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) aes_loop_pre_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	MOVADQ	   (%r10), \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	aesenclast \TMP2, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	aesenclast \TMP2, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	aesenclast \TMP2, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	aesenclast \TMP2, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	movdqu	   16*0(%arg4 , %r11 , 1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 	pxor	   \TMP1, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	movdqa     \TMP1, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	movdqu	   16*1(%arg4 , %r11 , 1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	pxor	   \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	movdqa     \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	movdqu	   16*2(%arg4 , %r11 , 1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 	pxor	   \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 	movdqa     \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 	movdqu	   16*3(%arg4 , %r11 , 1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 	pxor	   \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 	movdqa     \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) .else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 	movdqu     \XMM1, 16*0(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 	movdqu     \XMM2, 16*1(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 	movdqu     \XMM3, 16*2(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 	movdqu     \XMM4, 16*3(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 	add	   $64, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 	pshufb %xmm14, \XMM1 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 	pxor	   \XMMDst, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) # combine GHASHed value with the corresponding ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 	pshufb %xmm14, \XMM2 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 	pshufb %xmm14, \XMM3 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 	pshufb %xmm14, \XMM4 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) _initial_blocks_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) * encrypt 4 blocks at a time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) * ghash the 4 previously encrypted ciphertext blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) * arg1, %arg3, %arg4 are used as pointers only, not modified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) * %r11 is the data offset value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) .macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	movdqa	  \XMM1, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	movdqa	  \XMM2, \XMM6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 	movdqa	  \XMM3, \XMM7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	movdqa	  \XMM4, \XMM8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988)         movdqa    SHUF_MASK(%rip), %xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989)         # multiply TMP5 * HashKey using karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	movdqa	  \XMM5, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 	pshufd	  $78, \XMM5, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 	pxor	  \XMM5, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 	paddd     ONE(%rip), \XMM0		# INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 	movdqu	  HashKey_4(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 	pclmulqdq $0x11, \TMP5, \TMP4           # TMP4 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 	movdqa    \XMM0, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 	paddd     ONE(%rip), \XMM0		# INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 	movdqa    \XMM0, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	paddd     ONE(%rip), \XMM0		# INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	movdqa    \XMM0, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 	paddd     ONE(%rip), \XMM0		# INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 	movdqa    \XMM0, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	pshufb %xmm15, \XMM1	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 	pclmulqdq $0x00, \TMP5, \XMM5           # XMM5 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 	pshufb %xmm15, \XMM2	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	pshufb %xmm15, \XMM3	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 	pshufb %xmm15, \XMM4	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 	pxor	  (%arg1), \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	pxor	  (%arg1), \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	pxor	  (%arg1), \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	pxor	  (%arg1), \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	movdqu	  HashKey_4_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) 	pclmulqdq $0x00, \TMP5, \TMP6       # TMP6 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 	movaps 0x10(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) 	aesenc	  \TMP1, \XMM1              # Round 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) 	aesenc	  \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) 	aesenc	  \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) 	aesenc	  \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) 	movaps 0x20(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	aesenc	  \TMP1, \XMM1              # Round 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	aesenc	  \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 	aesenc	  \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 	aesenc	  \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 	movdqa	  \XMM6, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 	pshufd	  $78, \XMM6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 	pxor	  \XMM6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) 	movdqu	  HashKey_3(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1 * b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) 	movaps 0x30(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) 	aesenc    \TMP3, \XMM1              # Round 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 	aesenc    \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 	aesenc    \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 	aesenc    \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 	pclmulqdq $0x00, \TMP5, \XMM6       # XMM6 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 	movaps 0x40(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	aesenc	  \TMP3, \XMM1              # Round 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 	movdqu	  HashKey_3_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	movaps 0x50(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	aesenc	  \TMP3, \XMM1              # Round 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 	pxor	  \TMP1, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	pxor	  \XMM6, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 	pxor	  \TMP2, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	movdqa	  \XMM7, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	pshufd	  $78, \XMM7, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 	pxor	  \XMM7, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	movdqu	  HashKey_2(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058)         # Multiply TMP5 * HashKey using karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	movaps 0x60(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	aesenc	  \TMP3, \XMM1              # Round 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	pclmulqdq $0x00, \TMP5, \XMM7       # XMM7 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	movaps 0x70(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	aesenc	  \TMP3, \XMM1              # Round 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 	movdqu	  HashKey_2_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	movaps 0x80(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	aesenc	  \TMP3, \XMM1              # Round 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	pxor	  \TMP1, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 	pxor	  \XMM7, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 	pxor	  \TMP2, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084)         # Multiply XMM8 * HashKey
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085)         # XMM8 and TMP5 hold the values for the two operands
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 	movdqa	  \XMM8, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 	pshufd	  $78, \XMM8, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 	pxor	  \XMM8, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	movdqu	  HashKey(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	pclmulqdq $0x11, \TMP5, \TMP1      # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	movaps 0x90(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	aesenc	  \TMP3, \XMM1             # Round 9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	pclmulqdq $0x00, \TMP5, \XMM8      # XMM8 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	lea	  0xa0(%arg1),%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 	mov	  keysize,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 	shr	  $2,%eax			# 128->4, 192->6, 256->8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 	sub	  $4,%eax			# 128->0, 192->2, 256->4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 	jz	  aes_loop_par_enc_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) aes_loop_par_enc\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 	MOVADQ	  (%r10),\TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) .irpc	index, 1234
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	aesenc	  \TMP3, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 	add	  $16,%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	sub	  $1,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	jnz	  aes_loop_par_enc\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) aes_loop_par_enc_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	MOVADQ	  (%r10), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 	aesenclast \TMP3, \XMM1           # Round 10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	aesenclast \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	aesenclast \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	aesenclast \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	movdqu    HashKey_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	pclmulqdq $0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	movdqu	  (%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 	movdqu	  16(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 	movdqu	  32(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 	movdqu	  48(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129)         movdqu    \XMM1, (%arg3,%r11,1)        # Write to the ciphertext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130)         movdqu    \XMM2, 16(%arg3,%r11,1)      # Write to the ciphertext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131)         movdqu    \XMM3, 32(%arg3,%r11,1)      # Write to the ciphertext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132)         movdqu    \XMM4, 48(%arg3,%r11,1)      # Write to the ciphertext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	pshufb %xmm15, \XMM1        # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	pshufb %xmm15, \XMM2	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 	pshufb %xmm15, \XMM3	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 	pshufb %xmm15, \XMM4	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	pxor	  \TMP4, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 	pxor	  \XMM8, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	pxor	  \TMP6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	pxor	  \TMP1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	pxor	  \XMM5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	movdqa	  \TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	pslldq	  $8, \TMP3                    # left shift TMP3 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	psrldq	  $8, \TMP2                    # right shift TMP2 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	pxor	  \TMP3, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	pxor	  \TMP2, \TMP1	  # accumulate the results in TMP1:XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149)         # first phase of reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	movdqa    \XMM5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 	movdqa    \XMM5, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 	movdqa    \XMM5, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	pslld     $31, \TMP2                   # packed right shift << 31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 	pslld     $30, \TMP3                   # packed right shift << 30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 	pslld     $25, \TMP4                   # packed right shift << 25
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 	pxor      \TMP3, \TMP2	               # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 	pxor      \TMP4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) 	movdqa    \TMP2, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 	psrldq    $4, \TMP5                    # right shift T5 1 DW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	pslldq    $12, \TMP2                   # left shift T2 3 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	pxor      \TMP2, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165)         # second phase of reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	movdqa    \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	movdqa    \XMM5,\TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	movdqa    \XMM5,\TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	psrld     $1, \TMP2                    # packed left shift >>1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	psrld     $2, \TMP3                    # packed left shift >>2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	psrld     $7, \TMP4                    # packed left shift >>7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	pxor      \TMP3,\TMP2		       # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	pxor      \TMP4,\TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	pxor      \TMP5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	pxor      \TMP2, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	pxor      \TMP1, \XMM5                 # result is in TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	pxor	  \XMM5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) * decrypt 4 blocks at a time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) * ghash the 4 previously decrypted ciphertext blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) * arg1, %arg3, %arg4 are used as pointers only, not modified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) * %r11 is the data offset value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) .macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	movdqa	  \XMM1, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	movdqa	  \XMM2, \XMM6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	movdqa	  \XMM3, \XMM7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	movdqa	  \XMM4, \XMM8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196)         movdqa    SHUF_MASK(%rip), %xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197)         # multiply TMP5 * HashKey using karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 	movdqa	  \XMM5, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	pshufd	  $78, \XMM5, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	pxor	  \XMM5, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 	paddd     ONE(%rip), \XMM0		# INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	movdqu	  HashKey_4(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	pclmulqdq $0x11, \TMP5, \TMP4           # TMP4 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	movdqa    \XMM0, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 	paddd     ONE(%rip), \XMM0		# INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 	movdqa    \XMM0, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 	paddd     ONE(%rip), \XMM0		# INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	movdqa    \XMM0, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 	paddd     ONE(%rip), \XMM0		# INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 	movdqa    \XMM0, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	pshufb %xmm15, \XMM1	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 	pclmulqdq $0x00, \TMP5, \XMM5           # XMM5 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	pshufb %xmm15, \XMM2	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	pshufb %xmm15, \XMM3	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	pshufb %xmm15, \XMM4	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	pxor	  (%arg1), \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	pxor	  (%arg1), \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 	pxor	  (%arg1), \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 	pxor	  (%arg1), \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	movdqu	  HashKey_4_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 	pclmulqdq $0x00, \TMP5, \TMP6       # TMP6 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 	movaps 0x10(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 	aesenc	  \TMP1, \XMM1              # Round 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 	aesenc	  \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 	aesenc	  \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 	aesenc	  \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 	movaps 0x20(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	aesenc	  \TMP1, \XMM1              # Round 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	aesenc	  \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	aesenc	  \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	aesenc	  \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	movdqa	  \XMM6, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	pshufd	  $78, \XMM6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 	pxor	  \XMM6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	movdqu	  HashKey_3(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1 * b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	movaps 0x30(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	aesenc    \TMP3, \XMM1              # Round 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 	aesenc    \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	aesenc    \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	aesenc    \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	pclmulqdq $0x00, \TMP5, \XMM6       # XMM6 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	movaps 0x40(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	aesenc	  \TMP3, \XMM1              # Round 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	movdqu	  HashKey_3_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	movaps 0x50(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 	aesenc	  \TMP3, \XMM1              # Round 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	pxor	  \TMP1, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 	pxor	  \XMM6, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 	pxor	  \TMP2, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	movdqa	  \XMM7, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	pshufd	  $78, \XMM7, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	pxor	  \XMM7, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	movdqu	  HashKey_2(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266)         # Multiply TMP5 * HashKey using karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 	movaps 0x60(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 	aesenc	  \TMP3, \XMM1              # Round 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 	pclmulqdq $0x00, \TMP5, \XMM7       # XMM7 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	movaps 0x70(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	aesenc	  \TMP3, \XMM1              # Round 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	movdqu	  HashKey_2_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	pclmulqdq $0x00, \TMP5, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 	movaps 0x80(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	aesenc	  \TMP3, \XMM1              # Round 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	pxor	  \TMP1, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 	pxor	  \XMM7, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	pxor	  \TMP2, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292)         # Multiply XMM8 * HashKey
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293)         # XMM8 and TMP5 hold the values for the two operands
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 	movdqa	  \XMM8, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 	pshufd	  $78, \XMM8, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	pxor	  \XMM8, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 	movdqu	  HashKey(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 	pclmulqdq $0x11, \TMP5, \TMP1      # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 	movaps 0x90(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	aesenc	  \TMP3, \XMM1             # Round 9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	aesenc	  \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	aesenc	  \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 	aesenc	  \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 	pclmulqdq $0x00, \TMP5, \XMM8      # XMM8 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	lea	  0xa0(%arg1),%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 	mov	  keysize,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	shr	  $2,%eax		        # 128->4, 192->6, 256->8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 	sub	  $4,%eax			# 128->0, 192->2, 256->4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 	jz	  aes_loop_par_dec_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) aes_loop_par_dec\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 	MOVADQ	  (%r10),\TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) .irpc	index, 1234
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	aesenc	  \TMP3, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 	add	  $16,%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 	sub	  $1,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 	jnz	  aes_loop_par_dec\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) aes_loop_par_dec_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	MOVADQ	  (%r10), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 	aesenclast \TMP3, \XMM1           # last round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	aesenclast \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 	aesenclast \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	aesenclast \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 	movdqu    HashKey_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 	pclmulqdq $0x00, \TMP5, \TMP2          # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	movdqu	  (%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 	pxor	  \TMP3, \XMM1                 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 	movdqu	  \XMM1, (%arg3,%r11,1)        # Write to plaintext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	movdqa    \TMP3, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	movdqu	  16(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	pxor	  \TMP3, \XMM2                 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 	movdqu	  \XMM2, 16(%arg3,%r11,1)      # Write to plaintext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 	movdqa    \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	movdqu	  32(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 	pxor	  \TMP3, \XMM3                 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 	movdqu	  \XMM3, 32(%arg3,%r11,1)      # Write to plaintext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 	movdqa    \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 	movdqu	  48(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 	pxor	  \TMP3, \XMM4                 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 	movdqu	  \XMM4, 48(%arg3,%r11,1)      # Write to plaintext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 	movdqa    \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 	pshufb %xmm15, \XMM1        # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 	pshufb %xmm15, \XMM2	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	pshufb %xmm15, \XMM3	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 	pshufb %xmm15, \XMM4	# perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 	pxor	  \TMP4, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 	pxor	  \XMM8, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 	pxor	  \TMP6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 	pxor	  \TMP1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	pxor	  \XMM5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 	movdqa	  \TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 	pslldq	  $8, \TMP3                    # left shift TMP3 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 	psrldq	  $8, \TMP2                    # right shift TMP2 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	pxor	  \TMP3, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 	pxor	  \TMP2, \TMP1	  # accumulate the results in TMP1:XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361)         # first phase of reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 	movdqa    \XMM5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	movdqa    \XMM5, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 	movdqa    \XMM5, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	pslld     $31, \TMP2                   # packed right shift << 31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 	pslld     $30, \TMP3                   # packed right shift << 30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	pslld     $25, \TMP4                   # packed right shift << 25
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 	pxor      \TMP3, \TMP2	               # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	pxor      \TMP4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	movdqa    \TMP2, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	psrldq    $4, \TMP5                    # right shift T5 1 DW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 	pslldq    $12, \TMP2                   # left shift T2 3 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 	pxor      \TMP2, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377)         # second phase of reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 	movdqa    \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 	movdqa    \XMM5,\TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 	movdqa    \XMM5,\TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 	psrld     $1, \TMP2                    # packed left shift >>1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 	psrld     $2, \TMP3                    # packed left shift >>2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 	psrld     $7, \TMP4                    # packed left shift >>7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 	pxor      \TMP3,\TMP2		       # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 	pxor      \TMP4,\TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 	pxor      \TMP5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	pxor      \TMP2, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 	pxor      \TMP1, \XMM5                 # result is in TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 	pxor	  \XMM5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) /* GHASH the last 4 ciphertext blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) .macro	GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398)         # Multiply TMP6 * HashKey (using Karatsuba)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 	movdqa	  \XMM1, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 	pshufd	  $78, \XMM1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 	pxor	  \XMM1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	movdqu	  HashKey_4(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	pclmulqdq $0x11, \TMP5, \TMP6       # TMP6 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 	pclmulqdq $0x00, \TMP5, \XMM1       # XMM1 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	movdqu	  HashKey_4_k(%arg2), \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 	movdqa	  \XMM1, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	movdqa	  \TMP2, \XMM1              # result in TMP6, XMMDst, XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411)         # Multiply TMP1 * HashKey (using Karatsuba)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 	movdqa	  \XMM2, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 	pshufd	  $78, \XMM2, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 	pxor	  \XMM2, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 	movdqu	  HashKey_3(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 	pclmulqdq $0x00, \TMP5, \XMM2       # XMM2 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 	movdqu	  HashKey_3_k(%arg2), \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 	pxor	  \TMP1, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 	pxor	  \XMM2, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 	pxor	  \TMP2, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) # results accumulated in TMP6, XMMDst, XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426)         # Multiply TMP1 * HashKey (using Karatsuba)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 	movdqa	  \XMM3, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 	pshufd	  $78, \XMM3, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 	pxor	  \XMM3, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 	movdqu	  HashKey_2(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 	pclmulqdq $0x11, \TMP5, \TMP1       # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 	pclmulqdq $0x00, \TMP5, \XMM3       # XMM3 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 	movdqu	  HashKey_2_k(%arg2), \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 	pxor	  \TMP1, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 	pxor	  \XMM3, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 	pxor	  \TMP2, \XMM1   # results accumulated in TMP6, XMMDst, XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440)         # Multiply TMP1 * HashKey (using Karatsuba)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 	movdqa	  \XMM4, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 	pshufd	  $78, \XMM4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 	pxor	  \XMM4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	movdqu	  HashKey(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	pclmulqdq $0x11, \TMP5, \TMP1	    # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 	pclmulqdq $0x00, \TMP5, \XMM4       # XMM4 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	movdqu	  HashKey_k(%arg2), \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 	pclmulqdq $0x00, \TMP4, \TMP2       # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 	pxor	  \TMP1, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 	pxor	  \XMM4, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 	pxor	  \XMM1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 	pxor	  \TMP6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 	pxor	  \XMMDst, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 	# middle section of the temp results combined as in karatsuba algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 	movdqa	  \TMP2, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 	pslldq	  $8, \TMP4                 # left shift TMP4 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 	psrldq	  $8, \TMP2                 # right shift TMP2 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 	pxor	  \TMP4, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 	pxor	  \TMP2, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) # TMP6:XMMDst holds the result of the accumulated carry-less multiplications
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 	# first phase of the reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	movdqa    \XMMDst, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 	movdqa    \XMMDst, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 	movdqa    \XMMDst, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) # move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 	pslld     $31, \TMP2                # packed right shifting << 31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 	pslld     $30, \TMP3                # packed right shifting << 30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 	pslld     $25, \TMP4                # packed right shifting << 25
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 	pxor      \TMP3, \TMP2              # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 	pxor      \TMP4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 	movdqa    \TMP2, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) 	psrldq    $4, \TMP7                 # right shift TMP7 1 DW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 	pslldq    $12, \TMP2                # left shift TMP2 3 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) 	pxor      \TMP2, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476)         # second phase of the reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 	movdqa    \XMMDst, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	# make 3 copies of XMMDst for doing 3 shift operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 	movdqa    \XMMDst, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 	movdqa    \XMMDst, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 	psrld     $1, \TMP2                 # packed left shift >> 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 	psrld     $2, \TMP3                 # packed left shift >> 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 	psrld     $7, \TMP4                 # packed left shift >> 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 	pxor      \TMP3, \TMP2              # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	pxor      \TMP4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 	pxor      \TMP7, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 	pxor      \TMP2, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 	pxor      \TMP6, \XMMDst            # reduced result is in XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) /* Encryption of a single block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) * uses eax & r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) .macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 	pxor		(%arg1), \XMM0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	mov		keysize,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	shr		$2,%eax			# 128->4, 192->6, 256->8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 	add		$5,%eax			# 128->9, 192->11, 256->13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	lea		16(%arg1), %r10	  # get first expanded key address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) _esb_loop_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	MOVADQ		(%r10),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 	aesenc		\TMP1,\XMM0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 	add		$16,%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 	sub		$1,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 	jnz		_esb_loop_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	MOVADQ		(%r10),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	aesenclast	\TMP1,\XMM0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) * void aesni_gcm_dec(void *aes_ctx,    // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) *                   struct gcm_context_data *data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) *                                      // Context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) *                   u8 *out,           // Plaintext output. Encrypt in-place is allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) *                   const u8 *in,      // Ciphertext input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) *                   u64 plaintext_len, // Length of data in bytes for decryption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) *                   u8 *iv,            // Pre-counter block j0: 4 byte salt (from Security Association)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) *                                      // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) *                                      // concatenated with 0x00000001. 16-byte aligned pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) *                   u8 *hash_subkey,   // H, the Hash sub key input. Data starts on a 16-byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) *                   const u8 *aad,     // Additional Authentication Data (AAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) *                   u64 aad_len,       // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) *                   u8  *auth_tag,     // Authenticated Tag output. The driver will compare this to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) *                                      // given authentication tag and only return the plaintext if they match.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) *                   u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) *                                      // (most likely), 12 or 8.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) * Assumptions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) * keys:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) *       keys are pre-expanded and aligned to 16 bytes. we are using the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) *       set of 11 keys in the data structure void *aes_ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) * iv:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) *       0                   1                   2                   3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) *       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) *       |                             Salt  (From the SA)               |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) *       |                     Initialization Vector                     |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) *       |         (This is the sequence number from IPSec header)       |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) *       |                              0x1                              |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) * AAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) *       AAD padded to 128 bits with 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) *       for example, assume AAD is a u32 vector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) *       if AAD is 8 bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) *       AAD[3] = {A0, A1};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) *       padded AAD in xmm register = {A1 A0 0 0}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) *       0                   1                   2                   3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) *       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) *       |                               SPI (A1)                        |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) *       |                     32-bit Sequence Number (A0)               |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) *       |                              0x0                              |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) *                                       AAD Format with 32-bit Sequence Number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) *       if AAD is 12 bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) *       AAD[3] = {A0, A1, A2};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) *       padded AAD in xmm register = {A2 A1 A0 0}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) *       0                   1                   2                   3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) *       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) *       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) *       |                               SPI (A2)                        |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) *       |                 64-bit Extended Sequence Number {A1,A0}       |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) *       |                                                               |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) *       |                              0x0                              |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) *                        AAD Format with 64-bit Extended Sequence Number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) * poly = x^128 + x^127 + x^126 + x^121 + 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) *****************************************************************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) SYM_FUNC_START(aesni_gcm_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 	FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 	GCM_INIT %arg6, arg7, arg8, arg9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 	GCM_ENC_DEC dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) 	GCM_COMPLETE arg10, arg11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 	FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) SYM_FUNC_END(aesni_gcm_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) * void aesni_gcm_enc(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) *                    struct gcm_context_data *data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) *                                        // Context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) *                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) *                    const u8 *in,       // Plaintext input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) *                    u64 plaintext_len,  // Length of data in bytes for encryption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) *                    u8 *iv,             // Pre-counter block j0: 4 byte salt (from Security Association)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) *                                        // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) *                                        // concatenated with 0x00000001. 16-byte aligned pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) *                    u8 *hash_subkey,    // H, the Hash sub key input. Data starts on a 16-byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) *                    const u8 *aad,      // Additional Authentication Data (AAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) *                    u64 aad_len,        // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) *                    u8 *auth_tag,       // Authenticated Tag output.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) *                    u64 auth_tag_len);  // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) *                                        // 12 or 8.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) * Assumptions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) * keys:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) *       keys are pre-expanded and aligned to 16 bytes. we are using the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) *       first set of 11 keys in the data structure void *aes_ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) * iv:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) *       0                   1                   2                   3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) *       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) *       |                             Salt  (From the SA)               |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) *       |                     Initialization Vector                     |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) *       |         (This is the sequence number from IPSec header)       |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) *       |                              0x1                              |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) * AAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) *       AAD padded to 128 bits with 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) *       for example, assume AAD is a u32 vector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) *       if AAD is 8 bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) *       AAD[3] = {A0, A1};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) *       padded AAD in xmm register = {A1 A0 0 0}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) *       0                   1                   2                   3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) *       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) *       |                               SPI (A1)                        |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) *       |                     32-bit Sequence Number (A0)               |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) *       |                              0x0                              |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) *                                 AAD Format with 32-bit Sequence Number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) *       if AAD is 12 bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) *       AAD[3] = {A0, A1, A2};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) *       padded AAD in xmm register = {A2 A1 A0 0}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) *       0                   1                   2                   3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) *       0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) *       |                               SPI (A2)                        |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) *       |                 64-bit Extended Sequence Number {A1,A0}       |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) *       |                                                               |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) *       |                              0x0                              |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) *       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) *                         AAD Format with 64-bit Extended Sequence Number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) * poly = x^128 + x^127 + x^126 + x^121 + 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) ***************************************************************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) SYM_FUNC_START(aesni_gcm_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 	FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 	GCM_INIT %arg6, arg7, arg8, arg9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 	GCM_ENC_DEC enc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 	GCM_COMPLETE arg10, arg11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 	FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) SYM_FUNC_END(aesni_gcm_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) * void aesni_gcm_init(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) *                     struct gcm_context_data *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) *                                         // context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) *                     u8 *iv,             // Pre-counter block j0: 4 byte salt (from Security Association)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) *                                         // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) *                                         // concatenated with 0x00000001. 16-byte aligned pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) *                     u8 *hash_subkey,    // H, the Hash sub key input. Data starts on a 16-byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) *                     const u8 *aad,      // Additional Authentication Data (AAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) *                     u64 aad_len)        // Length of AAD in bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) SYM_FUNC_START(aesni_gcm_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 	FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 	GCM_INIT %arg3, %arg4,%arg5, %arg6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 	FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) SYM_FUNC_END(aesni_gcm_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) * void aesni_gcm_enc_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) *                    struct gcm_context_data *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) *                                        // context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) *                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) *                    const u8 *in,       // Plaintext input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) *                    u64 plaintext_len,  // Length of data in bytes for encryption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) SYM_FUNC_START(aesni_gcm_enc_update)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 	FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	GCM_ENC_DEC enc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 	FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) SYM_FUNC_END(aesni_gcm_enc_update)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) * void aesni_gcm_dec_update(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) *                    struct gcm_context_data *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) *                                        // context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) *                    u8 *out,            // Ciphertext output. Encrypt in-place is allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) *                    const u8 *in,       // Plaintext input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) *                    u64 plaintext_len,  // Length of data in bytes for encryption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) SYM_FUNC_START(aesni_gcm_dec_update)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 	FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 	GCM_ENC_DEC dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 	FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) SYM_FUNC_END(aesni_gcm_dec_update)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) * void aesni_gcm_finalize(void *aes_ctx,      // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) *                    struct gcm_context_data *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) *                                        // context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) *                    u8 *auth_tag,       // Authenticated Tag output.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) *                    u64 auth_tag_len);  // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) *                                        // 12 or 8.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) SYM_FUNC_START(aesni_gcm_finalize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 	FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	GCM_COMPLETE %arg3 %arg4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 	FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) SYM_FUNC_END(aesni_gcm_finalize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) SYM_FUNC_START_LOCAL(_key_expansion_256a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 	pshufd $0b11111111, %xmm1, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 	shufps $0b00010000, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 	pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 	shufps $0b10001100, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 	pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 	pxor %xmm1, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 	movaps %xmm0, (TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 	add $0x10, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) SYM_FUNC_END(_key_expansion_256a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) SYM_FUNC_END_ALIAS(_key_expansion_128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) SYM_FUNC_START_LOCAL(_key_expansion_192a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 	pshufd $0b01010101, %xmm1, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 	shufps $0b00010000, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 	pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 	shufps $0b10001100, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 	pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 	pxor %xmm1, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 	movaps %xmm2, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 	movaps %xmm2, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 	pslldq $4, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 	pshufd $0b11111111, %xmm0, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 	pxor %xmm3, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 	pxor %xmm5, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 	movaps %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 	shufps $0b01000100, %xmm0, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 	movaps %xmm6, (TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) 	shufps $0b01001110, %xmm2, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 	movaps %xmm1, 0x10(TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) 	add $0x20, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) SYM_FUNC_END(_key_expansion_192a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) SYM_FUNC_START_LOCAL(_key_expansion_192b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 	pshufd $0b01010101, %xmm1, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 	shufps $0b00010000, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 	pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 	shufps $0b10001100, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 	pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 	pxor %xmm1, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) 	movaps %xmm2, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 	pslldq $4, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) 	pshufd $0b11111111, %xmm0, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) 	pxor %xmm3, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) 	pxor %xmm5, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) 	movaps %xmm0, (TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) 	add $0x10, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) SYM_FUNC_END(_key_expansion_192b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) SYM_FUNC_START_LOCAL(_key_expansion_256b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 	pshufd $0b10101010, %xmm1, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 	shufps $0b00010000, %xmm2, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 	pxor %xmm4, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 	shufps $0b10001100, %xmm2, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 	pxor %xmm4, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 	pxor %xmm1, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 	movaps %xmm2, (TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 	add $0x10, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) SYM_FUNC_END(_key_expansion_256b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829)  * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830)  *                   unsigned int key_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) SYM_FUNC_START(aesni_set_key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 	pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 	movl (FRAME_OFFSET+8)(%esp), KEYP	# ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 	movl (FRAME_OFFSET+12)(%esp), UKEYP	# in_key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 	movl (FRAME_OFFSET+16)(%esp), %edx	# key_len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 	movups (UKEYP), %xmm0		# user key (first 16 bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 	movaps %xmm0, (KEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 	lea 0x10(KEYP), TKEYP		# key addr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 	movl %edx, 480(KEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 	pxor %xmm4, %xmm4		# xmm4 is assumed 0 in _key_expansion_x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 	cmp $24, %dl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 	jb .Lenc_key128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 	je .Lenc_key192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 	movups 0x10(UKEYP), %xmm2	# other user key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 	movaps %xmm2, (TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 	add $0x10, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 	aeskeygenassist $0x1, %xmm2, %xmm1	# round 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 	call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 	aeskeygenassist $0x1, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 	call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 	aeskeygenassist $0x2, %xmm2, %xmm1	# round 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 	call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) 	aeskeygenassist $0x2, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 	call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 	aeskeygenassist $0x4, %xmm2, %xmm1	# round 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 	call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 	aeskeygenassist $0x4, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 	call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 	aeskeygenassist $0x8, %xmm2, %xmm1	# round 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 	aeskeygenassist $0x8, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 	call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 	aeskeygenassist $0x10, %xmm2, %xmm1	# round 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 	call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 	aeskeygenassist $0x10, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 	call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 	aeskeygenassist $0x20, %xmm2, %xmm1	# round 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 	call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 	aeskeygenassist $0x20, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 	call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 	aeskeygenassist $0x40, %xmm2, %xmm1	# round 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 	call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 	jmp .Ldec_key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) .Lenc_key192:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 	movq 0x10(UKEYP), %xmm2		# other user key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 	aeskeygenassist $0x1, %xmm2, %xmm1	# round 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 	call _key_expansion_192a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 	aeskeygenassist $0x2, %xmm2, %xmm1	# round 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 	call _key_expansion_192b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 	aeskeygenassist $0x4, %xmm2, %xmm1	# round 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 	call _key_expansion_192a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 	aeskeygenassist $0x8, %xmm2, %xmm1	# round 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 	call _key_expansion_192b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 	aeskeygenassist $0x10, %xmm2, %xmm1	# round 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 	call _key_expansion_192a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 	aeskeygenassist $0x20, %xmm2, %xmm1	# round 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 	call _key_expansion_192b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 	aeskeygenassist $0x40, %xmm2, %xmm1	# round 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 	call _key_expansion_192a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 	aeskeygenassist $0x80, %xmm2, %xmm1	# round 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 	call _key_expansion_192b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 	jmp .Ldec_key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) .Lenc_key128:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 	aeskeygenassist $0x1, %xmm0, %xmm1	# round 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 	call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 	aeskeygenassist $0x2, %xmm0, %xmm1	# round 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 	call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 	aeskeygenassist $0x4, %xmm0, %xmm1	# round 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 	call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 	aeskeygenassist $0x8, %xmm0, %xmm1	# round 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 	call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 	aeskeygenassist $0x10, %xmm0, %xmm1	# round 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 	call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 	aeskeygenassist $0x20, %xmm0, %xmm1	# round 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 	call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 	aeskeygenassist $0x40, %xmm0, %xmm1	# round 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 	call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 	aeskeygenassist $0x80, %xmm0, %xmm1	# round 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 	call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 	aeskeygenassist $0x1b, %xmm0, %xmm1	# round 9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 	call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 	aeskeygenassist $0x36, %xmm0, %xmm1	# round 10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 	call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) .Ldec_key:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 	sub $0x10, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 	movaps (KEYP), %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 	movaps (TKEYP), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 	movaps %xmm0, 240(TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 	movaps %xmm1, 240(KEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 	add $0x10, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 	lea 240-16(TKEYP), UKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) .Ldec_key_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 	movaps (KEYP), %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 	aesimc %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 	movaps %xmm1, (UKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 	add $0x10, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 	sub $0x10, UKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 	cmp TKEYP, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 	jb .Ldec_key_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 	xor AREG, AREG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 	popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) SYM_FUNC_END(aesni_set_key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944)  * void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) SYM_FUNC_START(aesni_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 	pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 	pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	movl (FRAME_OFFSET+12)(%esp), KEYP	# ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 	movl (FRAME_OFFSET+16)(%esp), OUTP	# dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 	movl (FRAME_OFFSET+20)(%esp), INP	# src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 	movl 480(KEYP), KLEN		# key length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 	movups (INP), STATE		# input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	call _aesni_enc1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 	movups STATE, (OUTP)		# output
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 	popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 	popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) SYM_FUNC_END(aesni_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968)  * _aesni_enc1:		internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969)  * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970)  *	KEYP:		key struct pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971)  *	KLEN:		round count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972)  *	STATE:		initial state (input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973)  * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974)  *	STATE:		finial state (output)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975)  * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976)  *	KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977)  *	TKEYP (T1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) SYM_FUNC_START_LOCAL(_aesni_enc1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 	movaps (KEYP), KEY		# key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 	mov KEYP, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 	pxor KEY, STATE		# round 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 	add $0x30, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 	cmp $24, KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 	jb .Lenc128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 	lea 0x20(TKEYP), TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 	je .Lenc192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	add $0x20, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 	movaps -0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 	movaps -0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) .Lenc192:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 	movaps -0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 	movaps -0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) .Lenc128:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 	movaps -0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 	movaps -0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 	movaps (TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 	movaps 0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 	movaps 0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) 	movaps 0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 	movaps 0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) 	movaps 0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) 	movaps 0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 	aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 	movaps 0x70(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 	aesenclast KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) SYM_FUNC_END(_aesni_enc1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025)  * _aesni_enc4:	internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026)  * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027)  *	KEYP:		key struct pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028)  *	KLEN:		round count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029)  *	STATE1:		initial state (input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030)  *	STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031)  *	STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032)  *	STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033)  * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034)  *	STATE1:		finial state (output)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035)  *	STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036)  *	STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037)  *	STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038)  * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039)  *	KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040)  *	TKEYP (T1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) SYM_FUNC_START_LOCAL(_aesni_enc4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 	movaps (KEYP), KEY		# key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 	mov KEYP, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 	pxor KEY, STATE1		# round 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 	pxor KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 	pxor KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) 	pxor KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) 	add $0x30, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 	cmp $24, KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) 	jb .L4enc128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) 	lea 0x20(TKEYP), TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 	je .L4enc192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 	add $0x20, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 	movaps -0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 	movaps -0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) #.align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) .L4enc192:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) 	movaps -0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 	movaps -0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) #.align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) .L4enc128:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 	movaps -0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 	movaps -0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 	movaps (TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 	movaps 0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 	movaps 0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 	movaps 0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 	movaps 0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 	movaps 0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 	movaps 0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 	aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 	aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 	aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 	aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 	movaps 0x70(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 	aesenclast KEY, STATE1		# last round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 	aesenclast KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 	aesenclast KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 	aesenclast KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) SYM_FUNC_END(_aesni_enc4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133)  * void aesni_dec (const void *ctx, u8 *dst, const u8 *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) SYM_FUNC_START(aesni_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 	pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 	pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 	movl (FRAME_OFFSET+12)(%esp), KEYP	# ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 	movl (FRAME_OFFSET+16)(%esp), OUTP	# dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 	movl (FRAME_OFFSET+20)(%esp), INP	# src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 	mov 480(KEYP), KLEN		# key length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 	add $240, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 	movups (INP), STATE		# input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 	call _aesni_dec1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 	movups STATE, (OUTP)		#output
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 	popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 	popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) SYM_FUNC_END(aesni_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158)  * _aesni_dec1:		internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159)  * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160)  *	KEYP:		key struct pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161)  *	KLEN:		key length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162)  *	STATE:		initial state (input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163)  * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164)  *	STATE:		finial state (output)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165)  * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166)  *	KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167)  *	TKEYP (T1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) SYM_FUNC_START_LOCAL(_aesni_dec1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 	movaps (KEYP), KEY		# key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) 	mov KEYP, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 	pxor KEY, STATE		# round 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 	add $0x30, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 	cmp $24, KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 	jb .Ldec128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 	lea 0x20(TKEYP), TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 	je .Ldec192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) 	add $0x20, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 	movaps -0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 	movaps -0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) .Ldec192:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 	movaps -0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) 	movaps -0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) .Ldec128:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 	movaps -0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 	movaps -0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 	movaps (TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 	movaps 0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 	movaps 0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 	movaps 0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) 	movaps 0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) 	movaps 0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 	movaps 0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) 	aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 	movaps 0x70(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 	aesdeclast KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) SYM_FUNC_END(_aesni_dec1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215)  * _aesni_dec4:	internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216)  * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217)  *	KEYP:		key struct pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218)  *	KLEN:		key length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219)  *	STATE1:		initial state (input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220)  *	STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221)  *	STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222)  *	STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223)  * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224)  *	STATE1:		finial state (output)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225)  *	STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226)  *	STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227)  *	STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228)  * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)  *	KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230)  *	TKEYP (T1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) SYM_FUNC_START_LOCAL(_aesni_dec4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 	movaps (KEYP), KEY		# key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 	mov KEYP, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 	pxor KEY, STATE1		# round 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 	pxor KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 	pxor KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 	pxor KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 	add $0x30, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 	cmp $24, KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 	jb .L4dec128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 	lea 0x20(TKEYP), TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 	je .L4dec192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 	add $0x20, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 	movaps -0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 	movaps -0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) .L4dec192:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 	movaps -0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 	movaps -0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) .L4dec128:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 	movaps -0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 	movaps -0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 	movaps (TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 	movaps 0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 	movaps 0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 	movaps 0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 	movaps 0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 	movaps 0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 	movaps 0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 	aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 	aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 	aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 	aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 	movaps 0x70(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 	aesdeclast KEY, STATE1		# last round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 	aesdeclast KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 	aesdeclast KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 	aesdeclast KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) SYM_FUNC_END(_aesni_dec4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323)  * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324)  *		      size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) SYM_FUNC_START(aesni_ecb_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 	pushl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 	pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 	pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 	movl (FRAME_OFFSET+16)(%esp), KEYP	# ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 	movl (FRAME_OFFSET+20)(%esp), OUTP	# dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 	movl (FRAME_OFFSET+24)(%esp), INP	# src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 	movl (FRAME_OFFSET+28)(%esp), LEN	# len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 	test LEN, LEN		# check length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 	jz .Lecb_enc_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 	mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 	jb .Lecb_enc_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 	cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) 	jb .Lecb_enc_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) .Lecb_enc_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) 	movups (INP), STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 	movups 0x10(INP), STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 	movups 0x20(INP), STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 	movups 0x30(INP), STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 	call _aesni_enc4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 	movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) 	movups STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 	movups STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) 	movups STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 	sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) 	add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) 	add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 	cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 	jge .Lecb_enc_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) 	jb .Lecb_enc_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) .Lecb_enc_loop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 	movups (INP), STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) 	call _aesni_enc1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 	movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 	sub $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 	add $16, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 	add $16, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 	jge .Lecb_enc_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) .Lecb_enc_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 	popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 	popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 	popl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) SYM_FUNC_END(aesni_ecb_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383)  * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384)  *		      size_t len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) SYM_FUNC_START(aesni_ecb_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 	pushl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 	pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 	pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) 	movl (FRAME_OFFSET+16)(%esp), KEYP	# ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 	movl (FRAME_OFFSET+20)(%esp), OUTP	# dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 	movl (FRAME_OFFSET+24)(%esp), INP	# src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 	movl (FRAME_OFFSET+28)(%esp), LEN	# len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) 	test LEN, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 	jz .Lecb_dec_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 	mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 	add $240, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 	jb .Lecb_dec_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 	cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 	jb .Lecb_dec_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) .Lecb_dec_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 	movups (INP), STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 	movups 0x10(INP), STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 	movups 0x20(INP), STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 	movups 0x30(INP), STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 	call _aesni_dec4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 	movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 	movups STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) 	movups STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 	movups STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 	sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 	add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 	add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 	cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 	jge .Lecb_dec_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 	jb .Lecb_dec_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) .Lecb_dec_loop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 	movups (INP), STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 	call _aesni_dec1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 	movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 	sub $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 	add $16, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 	add $16, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 	jge .Lecb_dec_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) .Lecb_dec_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 	popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 	popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 	popl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) SYM_FUNC_END(aesni_ecb_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444)  * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445)  *		      size_t len, u8 *iv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) SYM_FUNC_START(aesni_cbc_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) 	pushl IVP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 	pushl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) 	pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) 	pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 	movl (FRAME_OFFSET+20)(%esp), KEYP	# ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 	movl (FRAME_OFFSET+24)(%esp), OUTP	# dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 	movl (FRAME_OFFSET+28)(%esp), INP	# src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 	movl (FRAME_OFFSET+32)(%esp), LEN	# len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 	movl (FRAME_OFFSET+36)(%esp), IVP	# iv
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 	jb .Lcbc_enc_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 	mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) 	movups (IVP), STATE	# load iv as initial state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) .Lcbc_enc_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 	movups (INP), IN	# load input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) 	pxor IN, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) 	call _aesni_enc1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) 	movups STATE, (OUTP)	# store output
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) 	sub $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) 	add $16, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) 	add $16, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) 	jge .Lcbc_enc_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 	movups STATE, (IVP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) .Lcbc_enc_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) 	popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 	popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 	popl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 	popl IVP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) SYM_FUNC_END(aesni_cbc_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488)  * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489)  *		      size_t len, u8 *iv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) SYM_FUNC_START(aesni_cbc_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) 	pushl IVP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 	pushl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 	pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 	pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) 	movl (FRAME_OFFSET+20)(%esp), KEYP	# ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) 	movl (FRAME_OFFSET+24)(%esp), OUTP	# dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) 	movl (FRAME_OFFSET+28)(%esp), INP	# src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) 	movl (FRAME_OFFSET+32)(%esp), LEN	# len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) 	movl (FRAME_OFFSET+36)(%esp), IVP	# iv
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 	jb .Lcbc_dec_just_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) 	mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) 	add $240, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) 	movups (IVP), IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) 	cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) 	jb .Lcbc_dec_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) .Lcbc_dec_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) 	movups (INP), IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) 	movaps IN1, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) 	movups 0x10(INP), IN2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) 	movaps IN2, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 	movups 0x20(INP), IN3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 	movaps IN3, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 	movups 0x30(INP), IN4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 	movaps IN4, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 	movups 0x20(INP), IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 	movaps IN1, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) 	movups 0x30(INP), IN2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 	movaps IN2, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 	call _aesni_dec4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 	pxor IV, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) 	pxor IN1, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 	pxor IN2, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 	pxor IN3, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 	movaps IN4, IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) 	pxor IN1, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 	movaps IN2, IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) 	movups (INP), IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 	pxor IN1, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) 	movups 0x10(INP), IN2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 	pxor IN2, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 	movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 	movups STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) 	movups STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) 	movups STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) 	sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) 	add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) 	add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) 	cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) 	jge .Lcbc_dec_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) 	jb .Lcbc_dec_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) .Lcbc_dec_loop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) 	movups (INP), IN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) 	movaps IN, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) 	call _aesni_dec1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) 	pxor IV, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) 	movups STATE, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) 	movaps IN, IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) 	sub $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) 	add $16, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) 	add $16, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) 	jge .Lcbc_dec_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) .Lcbc_dec_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) 	movups IV, (IVP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) .Lcbc_dec_just_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) 	popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 	popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) 	popl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) 	popl IVP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) SYM_FUNC_END(aesni_cbc_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) .pushsection .rodata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) .Lbswap_mask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) 	.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) .popsection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588)  * _aesni_inc_init:	internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589)  *	setup registers used by _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590)  * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591)  *	IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592)  * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593)  *	CTR:	== IV, in little endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594)  *	TCTR_LOW: == lower qword of CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595)  *	INC:	== 1, in little endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596)  *	BSWAP_MASK == endian swapping mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) SYM_FUNC_START_LOCAL(_aesni_inc_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) 	movaps .Lbswap_mask, BSWAP_MASK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) 	movaps IV, CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) 	pshufb BSWAP_MASK, CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 	mov $1, TCTR_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) 	movq TCTR_LOW, INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) 	movq CTR, TCTR_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) SYM_FUNC_END(_aesni_inc_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609)  * _aesni_inc:		internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610)  *	Increase IV by 1, IV is in big endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611)  * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612)  *	IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613)  *	CTR:	== IV, in little endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614)  *	TCTR_LOW: == lower qword of CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615)  *	INC:	== 1, in little endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616)  *	BSWAP_MASK == endian swapping mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617)  * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618)  *	IV:	Increase by 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619)  * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620)  *	CTR:	== output IV, in little endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621)  *	TCTR_LOW: == lower qword of CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) SYM_FUNC_START_LOCAL(_aesni_inc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) 	paddq INC, CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) 	add $1, TCTR_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) 	jnc .Linc_low
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) 	pslldq $8, INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) 	paddq INC, CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) 	psrldq $8, INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) .Linc_low:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) 	movaps CTR, IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 	pshufb BSWAP_MASK, IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) SYM_FUNC_END(_aesni_inc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637)  * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638)  *		      size_t len, u8 *iv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) SYM_FUNC_START(aesni_ctr_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 	jb .Lctr_enc_just_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 	mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 	movups (IVP), IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) 	call _aesni_inc_init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 	cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 	jb .Lctr_enc_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) .Lctr_enc_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) 	movaps IV, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) 	call _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 	movups (INP), IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) 	movaps IV, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) 	call _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) 	movups 0x10(INP), IN2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) 	movaps IV, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) 	call _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) 	movups 0x20(INP), IN3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) 	movaps IV, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) 	call _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) 	movups 0x30(INP), IN4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) 	call _aesni_enc4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) 	pxor IN1, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) 	movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) 	pxor IN2, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) 	movups STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) 	pxor IN3, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) 	movups STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) 	pxor IN4, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) 	movups STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) 	sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) 	add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) 	add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 	cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) 	jge .Lctr_enc_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) 	jb .Lctr_enc_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) .Lctr_enc_loop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 	movaps IV, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 	call _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) 	movups (INP), IN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) 	call _aesni_enc1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) 	pxor IN, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) 	movups STATE, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) 	sub $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) 	add $16, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) 	add $16, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) 	cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) 	jge .Lctr_enc_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) .Lctr_enc_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) 	movups IV, (IVP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) .Lctr_enc_just_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) SYM_FUNC_END(aesni_ctr_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700)  * _aesni_gf128mul_x_ble:		internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701)  *	Multiply in GF(2^128) for XTS IVs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702)  * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703)  *	IV:	current IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704)  *	GF128MUL_MASK == mask with 0x87 and 0x01
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705)  * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706)  *	IV:	next IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707)  * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708)  *	CTR:	== temporary value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) #define _aesni_gf128mul_x_ble() \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) 	pshufd $0x13, IV, CTR; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 	paddq IV, IV; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) 	psrad $31, CTR; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) 	pand GF128MUL_MASK, CTR; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 	pxor CTR, IV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718)  * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719)  *			  const u8 *src, unsigned int len, le128 *iv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) SYM_FUNC_START(aesni_xts_encrypt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) 	movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) 	movups (IVP), IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) 	mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) .Lxts_enc_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) 	movdqa IV, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) 	movdqu 0x00(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) 	pxor INC, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) 	movdqu IV, 0x00(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) 	_aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) 	movdqa IV, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) 	movdqu 0x10(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) 	pxor INC, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) 	movdqu IV, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) 	_aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) 	movdqa IV, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) 	movdqu 0x20(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) 	pxor INC, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) 	movdqu IV, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) 	_aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) 	movdqa IV, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) 	movdqu 0x30(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) 	pxor INC, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) 	movdqu IV, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) 	call _aesni_enc4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) 	movdqu 0x00(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) 	pxor INC, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) 	movdqu STATE1, 0x00(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) 	movdqu 0x10(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) 	pxor INC, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) 	movdqu STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) 	movdqu 0x20(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) 	pxor INC, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) 	movdqu STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) 	movdqu 0x30(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) 	pxor INC, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) 	movdqu STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) 	_aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) 	add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) 	add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) 	sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) 	ja .Lxts_enc_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) 	movups IV, (IVP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) SYM_FUNC_END(aesni_xts_encrypt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785)  * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786)  *			  const u8 *src, unsigned int len, le128 *iv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) SYM_FUNC_START(aesni_xts_decrypt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) 	FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) 	movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 	movups (IVP), IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) 	mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 	add $240, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) .Lxts_dec_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 	movdqa IV, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 	movdqu 0x00(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 	pxor INC, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 	movdqu IV, 0x00(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) 	_aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) 	movdqa IV, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) 	movdqu 0x10(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) 	pxor INC, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) 	movdqu IV, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) 	_aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) 	movdqa IV, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) 	movdqu 0x20(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) 	pxor INC, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) 	movdqu IV, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) 	_aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) 	movdqa IV, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) 	movdqu 0x30(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) 	pxor INC, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) 	movdqu IV, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) 	call _aesni_dec4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 	movdqu 0x00(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) 	pxor INC, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) 	movdqu STATE1, 0x00(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) 	movdqu 0x10(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) 	pxor INC, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) 	movdqu STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) 	movdqu 0x20(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) 	pxor INC, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) 	movdqu STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) 	movdqu 0x30(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) 	pxor INC, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) 	movdqu STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) 	_aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) 	add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) 	add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 	sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 	ja .Lxts_dec_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 	movups IV, (IVP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) 	FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) SYM_FUNC_END(aesni_xts_decrypt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) #endif