^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Implement AES algorithm in Intel AES-NI instructions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * The white paper of AES-NI instructions can be downloaded from:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * http://softwarecommunity.intel.com/isn/downloads/intelavx/AES-Instructions-Set_WP.pdf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Copyright (C) 2008, Intel Corp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Author: Huang Ying <ying.huang@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Vinodh Gopal <vinodh.gopal@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Kahraman Akdemir
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Added RFC4106 AES-GCM support for 128-bit keys under the AEAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * interface for 64-bit kernels.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * Authors: Erdinc Ozturk (erdinc.ozturk@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * Aidan O'Mahony (aidan.o.mahony@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * Adrian Hoban <adrian.hoban@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * James Guilford (james.guilford@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * Gabriele Paoloni <gabriele.paoloni@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * Tadeusz Struk (tadeusz.struk@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * Wajdi Feghali (wajdi.k.feghali@intel.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * Copyright (c) 2010, Intel Corporation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * Ported x86_64 version to x86:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * Author: Mathias Krause <minipli@googlemail.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #include <asm/frame.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #include <asm/nospec-branch.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * The following macros are used to move an (un)aligned 16 byte value to/from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * an XMM register. This can done for either FP or integer values, for FP use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * movaps (move aligned packed single) or integer use movdqa (move double quad
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * aligned). It doesn't make a performance difference which instruction is used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * since Nehalem (original Core i7) was released. However, the movaps is a byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * shorter, so that is the one we'll use for now. (same for unaligned).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #define MOVADQ movaps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #define MOVUDQ movups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) # constants in mergeable sections, linker can reorder and merge
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) .section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) .Lgf128mul_x_ble_mask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) .octa 0x00000000000000010000000000000087
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) .section .rodata.cst16.POLY, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) POLY: .octa 0xC2000000000000000000000000000001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) .section .rodata.cst16.TWOONE, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) TWOONE: .octa 0x00000001000000000000000000000001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) .section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) .section .rodata.cst16.MASK1, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) MASK1: .octa 0x0000000000000000ffffffffffffffff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) .section .rodata.cst16.MASK2, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) MASK2: .octa 0xffffffffffffffff0000000000000000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) .section .rodata.cst16.ONE, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) ONE: .octa 0x00000000000000000000000000000001
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) .section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) .section .rodata.cst16.dec, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) dec: .octa 0x1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) .section .rodata.cst16.enc, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) enc: .octa 0x2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) # order of these constants should not change.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) # more specifically, ALL_F should follow SHIFT_MASK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) # and zero should follow ALL_F
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) .section .rodata, "a", @progbits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) ALL_F: .octa 0xffffffffffffffffffffffffffffffff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) .octa 0x00000000000000000000000000000000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) #define STACK_OFFSET 8*3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) #define AadHash 16*0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) #define AadLen 16*1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #define InLen (16*1)+8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) #define PBlockEncKey 16*2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) #define OrigIV 16*3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) #define CurCount 16*4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) #define PBlockLen 16*5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) #define HashKey 16*6 // store HashKey <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) #define HashKey_2 16*7 // store HashKey^2 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) #define HashKey_3 16*8 // store HashKey^3 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) #define HashKey_4 16*9 // store HashKey^4 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) #define HashKey_k 16*10 // store XOR of High 64 bits and Low 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) // bits of HashKey <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) //(for Karatsuba purposes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) #define HashKey_2_k 16*11 // store XOR of High 64 bits and Low 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) // bits of HashKey^2 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) // (for Karatsuba purposes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) #define HashKey_3_k 16*12 // store XOR of High 64 bits and Low 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) // bits of HashKey^3 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) // (for Karatsuba purposes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) #define HashKey_4_k 16*13 // store XOR of High 64 bits and Low 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) // bits of HashKey^4 <<1 mod poly here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) // (for Karatsuba purposes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) #define arg1 rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) #define arg2 rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) #define arg3 rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) #define arg4 rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) #define arg5 r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) #define arg6 r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) #define arg7 STACK_OFFSET+8(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) #define arg8 STACK_OFFSET+16(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) #define arg9 STACK_OFFSET+24(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) #define arg10 STACK_OFFSET+32(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) #define arg11 STACK_OFFSET+40(%rsp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) #define keysize 2*15*16(%arg1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) #define STATE1 %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) #define STATE2 %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) #define STATE3 %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) #define STATE4 %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) #define STATE STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) #define IN1 %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) #define IN2 %xmm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) #define IN3 %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) #define IN4 %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) #define IN IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) #define KEY %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) #define IV %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) #define BSWAP_MASK %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) #define CTR %xmm11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) #define INC %xmm12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) #define GF128MUL_MASK %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) #define AREG %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) #define KEYP %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) #define OUTP %rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) #define UKEYP OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) #define INP %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) #define LEN %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) #define IVP %r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) #define KLEN %r9d
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) #define T1 %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) #define TKEYP T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) #define T2 %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) #define TCTR_LOW T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) #define AREG %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) #define KEYP %edi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) #define OUTP AREG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) #define UKEYP OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) #define INP %edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) #define LEN %esi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) #define IVP %ebp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) #define KLEN %ebx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) #define T1 %ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) #define TKEYP T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) .macro FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) push %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) push %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) push %r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) # states of %xmm registers %xmm6:%xmm15 not saved
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) # all %xmm registers are clobbered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) #
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) .macro FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) pop %r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) pop %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) pop %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) # Precompute hashkeys.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) # Input: Hash subkey.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) # Output: HashKeys stored in gcm_context_data. Only needs to be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) # once per key.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) # clobbers r12, and tmp xmm registers.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) .macro PRECOMPUTE SUBKEY TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) mov \SUBKEY, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) movdqu (%r12), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) movdqa SHUF_MASK(%rip), \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) pshufb \TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) # precompute HashKey<<1 mod poly from the HashKey (required for GHASH)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) movdqa \TMP3, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) psllq $1, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) psrlq $63, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) movdqa \TMP2, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) pslldq $8, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) psrldq $8, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) por \TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) # reduce HashKey<<1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) pshufd $0x24, \TMP1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) pcmpeqd TWOONE(%rip), \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) pand POLY(%rip), \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) pxor \TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) movdqu \TMP3, HashKey(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) movdqa \TMP3, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) pshufd $78, \TMP3, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) pxor \TMP3, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) movdqu \TMP1, HashKey_k(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) # TMP5 = HashKey^2<<1 (mod poly)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) movdqu \TMP5, HashKey_2(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) # HashKey_2 = HashKey^2<<1 (mod poly)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) pshufd $78, \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) pxor \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) movdqu \TMP1, HashKey_2_k(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) # TMP5 = HashKey^3<<1 (mod poly)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) movdqu \TMP5, HashKey_3(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) pshufd $78, \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) pxor \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) movdqu \TMP1, HashKey_3_k(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) GHASH_MUL \TMP5, \TMP3, \TMP1, \TMP2, \TMP4, \TMP6, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) # TMP5 = HashKey^3<<1 (mod poly)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) movdqu \TMP5, HashKey_4(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) pshufd $78, \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) pxor \TMP5, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) movdqu \TMP1, HashKey_4_k(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) # GCM_INIT initializes a gcm_context struct to prepare for encoding/decoding.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) # Clobbers rax, r10-r13 and xmm0-xmm6, %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) .macro GCM_INIT Iv SUBKEY AAD AADLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) mov \AADLEN, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) mov %r11, AadLen(%arg2) # ctx_data.aad_length = aad_length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) xor %r11d, %r11d
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) mov %r11, InLen(%arg2) # ctx_data.in_length = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) mov %r11, PBlockLen(%arg2) # ctx_data.partial_block_length = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) mov %r11, PBlockEncKey(%arg2) # ctx_data.partial_block_enc_key = 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) mov \Iv, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) movdqu (%rax), %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) movdqu %xmm0, OrigIV(%arg2) # ctx_data.orig_IV = iv
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) movdqa SHUF_MASK(%rip), %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) pshufb %xmm2, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) movdqu %xmm0, CurCount(%arg2) # ctx_data.current_counter = iv
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) PRECOMPUTE \SUBKEY, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) movdqu HashKey(%arg2), %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) CALC_AAD_HASH %xmm13, \AAD, \AADLEN, %xmm0, %xmm1, %xmm2, %xmm3, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) %xmm4, %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) # GCM_ENC_DEC Encodes/Decodes given data. Assumes that the passed gcm_context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) # struct has been initialized by GCM_INIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) # Requires the input data be at least 1 byte long because of READ_PARTIAL_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) # Clobbers rax, r10-r13, and xmm0-xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) .macro GCM_ENC_DEC operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) movdqu AadHash(%arg2), %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) movdqu HashKey(%arg2), %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) add %arg5, InLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) xor %r11d, %r11d # initialise the data pointer offset as zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) PARTIAL_BLOCK %arg3 %arg4 %arg5 %r11 %xmm8 \operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) sub %r11, %arg5 # sub partial block data used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) mov %arg5, %r13 # save the number of bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) and $-16, %r13 # %r13 = %r13 - (%r13 mod 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) mov %r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) # Encrypt/Decrypt first few blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) and $(3<<4), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) jz _initial_num_blocks_is_0_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) cmp $(2<<4), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) jb _initial_num_blocks_is_1_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) je _initial_num_blocks_is_2_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) _initial_num_blocks_is_3_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 5, 678, \operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) sub $48, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) jmp _initial_blocks_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) _initial_num_blocks_is_2_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 6, 78, \operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) sub $32, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) jmp _initial_blocks_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) _initial_num_blocks_is_1_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 7, 8, \operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) sub $16, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) jmp _initial_blocks_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) _initial_num_blocks_is_0_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) INITIAL_BLOCKS_ENC_DEC %xmm9, %xmm10, %xmm13, %xmm11, %xmm12, %xmm0, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) %xmm1, %xmm2, %xmm3, %xmm4, %xmm8, %xmm5, %xmm6, 8, 0, \operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) _initial_blocks_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) # Main loop - Encrypt/Decrypt remaining blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) test %r13, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) je _zero_cipher_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) sub $64, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) je _four_cipher_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) _crypt_by_4_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) GHASH_4_ENCRYPT_4_PARALLEL_\operation %xmm9, %xmm10, %xmm11, %xmm12, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) %xmm13, %xmm14, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) %xmm7, %xmm8, enc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) add $64, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) sub $64, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) jne _crypt_by_4_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) _four_cipher_left_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) GHASH_LAST_4 %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) %xmm15, %xmm1, %xmm2, %xmm3, %xmm4, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) _zero_cipher_left_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) movdqu %xmm8, AadHash(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) movdqu %xmm0, CurCount(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) mov %arg5, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) and $15, %r13 # %r13 = arg5 (mod 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) je _multiple_of_16_bytes_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) mov %r13, PBlockLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) # Handle the last <16 Byte block separately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) paddd ONE(%rip), %xmm0 # INCR CNT to get Yn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) movdqu %xmm0, CurCount(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) pshufb %xmm10, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # Encrypt(K, Yn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) movdqu %xmm0, PBlockEncKey(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) cmp $16, %arg5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) jge _large_enough_update_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) lea (%arg4,%r11,1), %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) mov %r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) READ_PARTIAL_BLOCK %r10 %r12 %xmm2 %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) jmp _data_read_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) _large_enough_update_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) sub $16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) add %r13, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) # receive the last <16 Byte block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) movdqu (%arg4, %r11, 1), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) sub %r13, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) add $16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) lea SHIFT_MASK+16(%rip), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) # adjust the shuffle mask pointer to be able to shift 16-r13 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) # (r13 is the number of bytes in plaintext mod 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) sub %r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) # get the appropriate shuffle mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) movdqu (%r12), %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) # shift right 16-r13 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) pshufb %xmm2, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) _data_read_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) lea ALL_F+16(%rip), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) sub %r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) movdqa %xmm1, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) pxor %xmm1, %xmm0 # XOR Encrypt(K, Yn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) movdqu (%r12), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) # get the appropriate mask to mask out top 16-r13 bytes of xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) pand %xmm1, %xmm0 # mask out top 16-r13 bytes of xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) pand %xmm1, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) pshufb %xmm10 ,%xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) pxor %xmm2, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) .else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) pshufb %xmm10,%xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) pxor %xmm0, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) movdqu %xmm8, AadHash(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) .ifc \operation, enc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) # GHASH computation for the last <16 byte block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) # shuffle xmm0 back to output as ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) pshufb %xmm10, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) # Output %r13 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) movq %xmm0, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) cmp $8, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) jle _less_than_8_bytes_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) mov %rax, (%arg3 , %r11, 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) add $8, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) psrldq $8, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) movq %xmm0, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) sub $8, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) _less_than_8_bytes_left_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) mov %al, (%arg3, %r11, 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) add $1, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) shr $8, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) sub $1, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) jne _less_than_8_bytes_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) _multiple_of_16_bytes_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) # GCM_COMPLETE Finishes update of tag of last partial block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) # Output: Authorization Tag (AUTH_TAG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) # Clobbers rax, r10-r12, and xmm0, xmm1, xmm5-xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) .macro GCM_COMPLETE AUTHTAG AUTHTAGLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) movdqu AadHash(%arg2), %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) movdqu HashKey(%arg2), %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) mov PBlockLen(%arg2), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) test %r12, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) je _partial_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) _partial_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) mov AadLen(%arg2), %r12 # %r13 = aadLen (number of bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) shl $3, %r12 # convert into number of bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) movd %r12d, %xmm15 # len(A) in %xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) mov InLen(%arg2), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) shl $3, %r12 # len(C) in bits (*128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) movq %r12, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) pslldq $8, %xmm15 # %xmm15 = len(A)||0x0000000000000000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) pxor %xmm1, %xmm15 # %xmm15 = len(A)||len(C)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) pxor %xmm15, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) GHASH_MUL %xmm8, %xmm13, %xmm9, %xmm10, %xmm11, %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) # final GHASH computation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) pshufb %xmm10, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) movdqu OrigIV(%arg2), %xmm0 # %xmm0 = Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) ENCRYPT_SINGLE_BLOCK %xmm0, %xmm1 # E(K, Y0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) pxor %xmm8, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) _return_T_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) mov \AUTHTAG, %r10 # %r10 = authTag
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) mov \AUTHTAGLEN, %r11 # %r11 = auth_tag_len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) cmp $16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) je _T_16_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) cmp $8, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) jl _T_4_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) _T_8_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) movq %xmm0, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) mov %rax, (%r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) add $8, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) sub $8, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) psrldq $8, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) test %r11, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) je _return_T_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) _T_4_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) movd %xmm0, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) mov %eax, (%r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) add $4, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) sub $4, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) psrldq $4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) test %r11, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) je _return_T_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) _T_123_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) movd %xmm0, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) cmp $2, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) jl _T_1_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) mov %ax, (%r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) cmp $2, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) je _return_T_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) add $2, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) sar $16, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) _T_1_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) mov %al, (%r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) jmp _return_T_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) _T_16_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) movdqu %xmm0, (%r10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) _return_T_done_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) /* GHASH_MUL MACRO to implement: Data*HashKey mod (128,127,126,121,0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) * Input: A and B (128-bits each, bit-reflected)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) * Output: C = A*B*x mod poly, (i.e. >>1 )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) * To compute GH = GH*HashKey mod poly, give HK = HashKey<<1 mod poly as input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) * GH = GH * HK * x mod poly which is equivalent to GH*HashKey mod poly.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) .macro GHASH_MUL GH HK TMP1 TMP2 TMP3 TMP4 TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) movdqa \GH, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) pshufd $78, \GH, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) pshufd $78, \HK, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) pxor \GH, \TMP2 # TMP2 = a1+a0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) pxor \HK, \TMP3 # TMP3 = b1+b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) pclmulqdq $0x11, \HK, \TMP1 # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) pclmulqdq $0x00, \HK, \GH # GH = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) pclmulqdq $0x00, \TMP3, \TMP2 # TMP2 = (a0+a1)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) pxor \GH, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) pxor \TMP1, \TMP2 # TMP2 = (a0*b0)+(a1*b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) movdqa \TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) pslldq $8, \TMP3 # left shift TMP3 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) psrldq $8, \TMP2 # right shift TMP2 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) pxor \TMP3, \GH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) pxor \TMP2, \TMP1 # TMP2:GH holds the result of GH*HK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) # first phase of the reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) movdqa \GH, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) movdqa \GH, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) movdqa \GH, \TMP4 # copy GH into TMP2,TMP3 and TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) # in in order to perform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) # independent shifts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) pslld $31, \TMP2 # packed right shift <<31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) pslld $30, \TMP3 # packed right shift <<30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) pslld $25, \TMP4 # packed right shift <<25
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) pxor \TMP3, \TMP2 # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) pxor \TMP4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) movdqa \TMP2, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) psrldq $4, \TMP5 # right shift TMP5 1 DW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) pslldq $12, \TMP2 # left shift TMP2 3 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) pxor \TMP2, \GH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) # second phase of the reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) movdqa \GH,\TMP2 # copy GH into TMP2,TMP3 and TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) # in in order to perform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) # independent shifts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) movdqa \GH,\TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) movdqa \GH,\TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) psrld $1,\TMP2 # packed left shift >>1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) psrld $2,\TMP3 # packed left shift >>2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) psrld $7,\TMP4 # packed left shift >>7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) pxor \TMP3,\TMP2 # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) pxor \TMP4,\TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) pxor \TMP5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) pxor \TMP2, \GH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) pxor \TMP1, \GH # result is in TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) # Reads DLEN bytes starting at DPTR and stores in XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) # where 0 < DLEN < 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) # Clobbers %rax, DLEN and XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) .macro READ_PARTIAL_BLOCK DPTR DLEN XMM1 XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) cmp $8, \DLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) jl _read_lt8_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) mov (\DPTR), %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) movq %rax, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) sub $8, \DLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) jz _done_read_partial_block_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) xor %eax, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) _read_next_byte_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) shl $8, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) mov 7(\DPTR, \DLEN, 1), %al
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) dec \DLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) jnz _read_next_byte_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) movq %rax, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) pslldq $8, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) por \XMM1, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) jmp _done_read_partial_block_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) _read_lt8_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) xor %eax, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) _read_next_byte_lt8_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) shl $8, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) mov -1(\DPTR, \DLEN, 1), %al
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) dec \DLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) jnz _read_next_byte_lt8_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) movq %rax, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) _done_read_partial_block_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) # CALC_AAD_HASH: Calculates the hash of the data which will not be encrypted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) # clobbers r10-11, xmm14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) .macro CALC_AAD_HASH HASHKEY AAD AADLEN TMP1 TMP2 TMP3 TMP4 TMP5 \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) TMP6 TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) MOVADQ SHUF_MASK(%rip), %xmm14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) mov \AAD, %r10 # %r10 = AAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) mov \AADLEN, %r11 # %r11 = aadLen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) pxor \TMP7, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) pxor \TMP6, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) cmp $16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) jl _get_AAD_rest\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) _get_AAD_blocks\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) movdqu (%r10), \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) pshufb %xmm14, \TMP7 # byte-reflect the AAD data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) pxor \TMP7, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) GHASH_MUL \TMP6, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) add $16, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) sub $16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) cmp $16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) jge _get_AAD_blocks\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) movdqu \TMP6, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) /* read the last <16B of AAD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) _get_AAD_rest\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) test %r11, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) je _get_AAD_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) READ_PARTIAL_BLOCK %r10, %r11, \TMP1, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) pshufb %xmm14, \TMP7 # byte-reflect the AAD data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) pxor \TMP6, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) GHASH_MUL \TMP7, \HASHKEY, \TMP1, \TMP2, \TMP3, \TMP4, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) movdqu \TMP7, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) _get_AAD_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) movdqu \TMP6, AadHash(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) # PARTIAL_BLOCK: Handles encryption/decryption and the tag partial blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) # between update calls.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) # Requires the input data be at least 1 byte long due to READ_PARTIAL_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) # Outputs encrypted bytes, and updates hash and partial info in gcm_data_context
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) # Clobbers rax, r10, r12, r13, xmm0-6, xmm9-13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) .macro PARTIAL_BLOCK CYPH_PLAIN_OUT PLAIN_CYPH_IN PLAIN_CYPH_LEN DATA_OFFSET \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) AAD_HASH operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) mov PBlockLen(%arg2), %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) test %r13, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) je _partial_block_done_\@ # Leave Macro if no partial blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) # Read in input data without over reading
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) cmp $16, \PLAIN_CYPH_LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) jl _fewer_than_16_bytes_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) movups (\PLAIN_CYPH_IN), %xmm1 # If more than 16 bytes, just fill xmm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) jmp _data_read_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) _fewer_than_16_bytes_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) lea (\PLAIN_CYPH_IN, \DATA_OFFSET, 1), %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) mov \PLAIN_CYPH_LEN, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) READ_PARTIAL_BLOCK %r10 %r12 %xmm0 %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) mov PBlockLen(%arg2), %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) _data_read_\@: # Finished reading in data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) movdqu PBlockEncKey(%arg2), %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) movdqu HashKey(%arg2), %xmm13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) lea SHIFT_MASK(%rip), %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) # adjust the shuffle mask pointer to be able to shift r13 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) # r16-r13 is the number of bytes in plaintext mod 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) add %r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) movdqu (%r12), %xmm2 # get the appropriate shuffle mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) pshufb %xmm2, %xmm9 # shift right r13 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) movdqa %xmm1, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) pxor %xmm1, %xmm9 # Cyphertext XOR E(K, Yn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) mov \PLAIN_CYPH_LEN, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) add %r13, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) sub $16, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) # Determine if if partial block is not being filled and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) # shift mask accordingly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) jge _no_extra_mask_1_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) sub %r10, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683) _no_extra_mask_1_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 686) # get the appropriate mask to mask out bottom r13 bytes of xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 687) pand %xmm1, %xmm9 # mask out bottom r13 bytes of xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 688)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 689) pand %xmm1, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 690) movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 691) pshufb %xmm10, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 692) pshufb %xmm2, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 693) pxor %xmm3, \AAD_HASH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 694)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 695) test %r10, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 696) jl _partial_incomplete_1_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 697)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 698) # GHASH computation for the last <16 Byte block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 699) GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 700) xor %eax, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 701)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 702) mov %rax, PBlockLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 703) jmp _dec_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 704) _partial_incomplete_1_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 705) add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 706) _dec_done_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 707) movdqu \AAD_HASH, AadHash(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 708) .else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 709) pxor %xmm1, %xmm9 # Plaintext XOR E(K, Yn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 711) mov \PLAIN_CYPH_LEN, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 712) add %r13, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 713) # Set r10 to be the amount of data left in CYPH_PLAIN_IN after filling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 714) sub $16, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 715) # Determine if if partial block is not being filled and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 716) # shift mask accordingly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 717) jge _no_extra_mask_2_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 718) sub %r10, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 719) _no_extra_mask_2_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 720)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 721) movdqu ALL_F-SHIFT_MASK(%r12), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 722) # get the appropriate mask to mask out bottom r13 bytes of xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 723) pand %xmm1, %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 724)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 725) movdqa SHUF_MASK(%rip), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 726) pshufb %xmm1, %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 727) pshufb %xmm2, %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 728) pxor %xmm9, \AAD_HASH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 729)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 730) test %r10, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 731) jl _partial_incomplete_2_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 732)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 733) # GHASH computation for the last <16 Byte block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 734) GHASH_MUL \AAD_HASH, %xmm13, %xmm0, %xmm10, %xmm11, %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 735) xor %eax, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 736)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 737) mov %rax, PBlockLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 738) jmp _encode_done_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 739) _partial_incomplete_2_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 740) add \PLAIN_CYPH_LEN, PBlockLen(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 741) _encode_done_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 742) movdqu \AAD_HASH, AadHash(%arg2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 743)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 744) movdqa SHUF_MASK(%rip), %xmm10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 745) # shuffle xmm9 back to output as ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 746) pshufb %xmm10, %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 747) pshufb %xmm2, %xmm9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 748) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 749) # output encrypted Bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 750) test %r10, %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 751) jl _partial_fill_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 752) mov %r13, %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 753) mov $16, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 754) # Set r13 to be the number of bytes to write out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 755) sub %r12, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 756) jmp _count_set_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 757) _partial_fill_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 758) mov \PLAIN_CYPH_LEN, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 759) _count_set_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 760) movdqa %xmm9, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 761) movq %xmm0, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 762) cmp $8, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 763) jle _less_than_8_bytes_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 764)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 765) mov %rax, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 766) add $8, \DATA_OFFSET
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 767) psrldq $8, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 768) movq %xmm0, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 769) sub $8, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 770) _less_than_8_bytes_left_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 771) movb %al, (\CYPH_PLAIN_OUT, \DATA_OFFSET, 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 772) add $1, \DATA_OFFSET
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 773) shr $8, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 774) sub $1, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 775) jne _less_than_8_bytes_left_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 776) _partial_block_done_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 777) .endm # PARTIAL_BLOCK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 778)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 779) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 780) * if a = number of total plaintext bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 781) * b = floor(a/16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 782) * num_initial_blocks = b mod 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 783) * encrypt the initial num_initial_blocks blocks and apply ghash on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 784) * the ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 785) * %r10, %r11, %r12, %rax, %xmm5, %xmm6, %xmm7, %xmm8, %xmm9 registers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 786) * are clobbered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 787) * arg1, %arg2, %arg3 are used as a pointer only, not modified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 788) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 789)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 791) .macro INITIAL_BLOCKS_ENC_DEC TMP1 TMP2 TMP3 TMP4 TMP5 XMM0 XMM1 \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 792) XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 793) MOVADQ SHUF_MASK(%rip), %xmm14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 794)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 795) movdqu AadHash(%arg2), %xmm\i # XMM0 = Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 797) # start AES for num_initial_blocks blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 798)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 799) movdqu CurCount(%arg2), \XMM0 # XMM0 = Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 800)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 801) .if (\i == 5) || (\i == 6) || (\i == 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 803) MOVADQ ONE(%RIP),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 804) MOVADQ 0(%arg1),\TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 805) .irpc index, \i_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 806) paddd \TMP1, \XMM0 # INCR Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 807) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 808) movdqa \XMM0, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 809) .else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 810) MOVADQ \XMM0, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 811) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 812) pshufb %xmm14, %xmm\index # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 813) pxor \TMP2, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 814) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 815) lea 0x10(%arg1),%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 816) mov keysize,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 817) shr $2,%eax # 128->4, 192->6, 256->8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 818) add $5,%eax # 128->9, 192->11, 256->13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 819)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 820) aes_loop_initial_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 821) MOVADQ (%r10),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 822) .irpc index, \i_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 823) aesenc \TMP1, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 824) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 825) add $16,%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 826) sub $1,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 827) jnz aes_loop_initial_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 828)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 829) MOVADQ (%r10), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 830) .irpc index, \i_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 831) aesenclast \TMP1, %xmm\index # Last Round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 832) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 833) .irpc index, \i_seq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 834) movdqu (%arg4 , %r11, 1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 835) pxor \TMP1, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 836) movdqu %xmm\index, (%arg3 , %r11, 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 837) # write back plaintext/ciphertext for num_initial_blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 838) add $16, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 839)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 840) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 841) movdqa \TMP1, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 842) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 843) pshufb %xmm14, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 844)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 845) # prepare plaintext/ciphertext for GHASH computation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 846) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 847) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 848)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 849) # apply GHASH on num_initial_blocks blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 850)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 851) .if \i == 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 852) pxor %xmm5, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 853) GHASH_MUL %xmm6, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 854) pxor %xmm6, %xmm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 855) GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 856) pxor %xmm7, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 857) GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 858) .elseif \i == 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 859) pxor %xmm6, %xmm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 860) GHASH_MUL %xmm7, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 861) pxor %xmm7, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 862) GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 863) .elseif \i == 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 864) pxor %xmm7, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 865) GHASH_MUL %xmm8, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 866) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 867) cmp $64, %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 868) jl _initial_blocks_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 869) # no need for precomputed values
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 870) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 871) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 872) * Precomputations for HashKey parallel with encryption of first 4 blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 873) * Haskey_i_k holds XORed values of the low and high parts of the Haskey_i
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 874) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 875) MOVADQ ONE(%RIP),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 876) paddd \TMP1, \XMM0 # INCR Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 877) MOVADQ \XMM0, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 878) pshufb %xmm14, \XMM1 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 879)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 880) paddd \TMP1, \XMM0 # INCR Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 881) MOVADQ \XMM0, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 882) pshufb %xmm14, \XMM2 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 883)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 884) paddd \TMP1, \XMM0 # INCR Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 885) MOVADQ \XMM0, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 886) pshufb %xmm14, \XMM3 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 887)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 888) paddd \TMP1, \XMM0 # INCR Y0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 889) MOVADQ \XMM0, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 890) pshufb %xmm14, \XMM4 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 891)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 892) MOVADQ 0(%arg1),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 893) pxor \TMP1, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 894) pxor \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 895) pxor \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 896) pxor \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 897) .irpc index, 1234 # do 4 rounds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 898) movaps 0x10*\index(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 899) aesenc \TMP1, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 900) aesenc \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 901) aesenc \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 902) aesenc \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 903) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 904) .irpc index, 56789 # do next 5 rounds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 905) movaps 0x10*\index(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 906) aesenc \TMP1, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 907) aesenc \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 908) aesenc \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 909) aesenc \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 910) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 911) lea 0xa0(%arg1),%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 912) mov keysize,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 913) shr $2,%eax # 128->4, 192->6, 256->8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 914) sub $4,%eax # 128->0, 192->2, 256->4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 915) jz aes_loop_pre_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 916)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 917) aes_loop_pre_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 918) MOVADQ (%r10),\TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 919) .irpc index, 1234
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 920) aesenc \TMP2, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 921) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 922) add $16,%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 923) sub $1,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 924) jnz aes_loop_pre_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 925)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 926) aes_loop_pre_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 927) MOVADQ (%r10), \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 928) aesenclast \TMP2, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 929) aesenclast \TMP2, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 930) aesenclast \TMP2, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 931) aesenclast \TMP2, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 932) movdqu 16*0(%arg4 , %r11 , 1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 933) pxor \TMP1, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 934) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 935) movdqu \XMM1, 16*0(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 936) movdqa \TMP1, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 937) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 938) movdqu 16*1(%arg4 , %r11 , 1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 939) pxor \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 940) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 941) movdqu \XMM2, 16*1(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 942) movdqa \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 943) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 944) movdqu 16*2(%arg4 , %r11 , 1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 945) pxor \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 946) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 947) movdqu \XMM3, 16*2(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 948) movdqa \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 949) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 950) movdqu 16*3(%arg4 , %r11 , 1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 951) pxor \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 952) .ifc \operation, dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 953) movdqu \XMM4, 16*3(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 954) movdqa \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 955) .else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 956) movdqu \XMM1, 16*0(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 957) movdqu \XMM2, 16*1(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 958) movdqu \XMM3, 16*2(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 959) movdqu \XMM4, 16*3(%arg3 , %r11 , 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 960) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 961)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 962) add $64, %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 963) pshufb %xmm14, \XMM1 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 964) pxor \XMMDst, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 965) # combine GHASHed value with the corresponding ciphertext
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 966) pshufb %xmm14, \XMM2 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 967) pshufb %xmm14, \XMM3 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 968) pshufb %xmm14, \XMM4 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 969)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 970) _initial_blocks_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 971)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 972) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 973)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 974) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 975) * encrypt 4 blocks at a time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 976) * ghash the 4 previously encrypted ciphertext blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 977) * arg1, %arg3, %arg4 are used as pointers only, not modified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 978) * %r11 is the data offset value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 979) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 980) .macro GHASH_4_ENCRYPT_4_PARALLEL_enc TMP1 TMP2 TMP3 TMP4 TMP5 \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 981) TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 982)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 983) movdqa \XMM1, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 984) movdqa \XMM2, \XMM6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 985) movdqa \XMM3, \XMM7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 986) movdqa \XMM4, \XMM8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 987)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 988) movdqa SHUF_MASK(%rip), %xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 989) # multiply TMP5 * HashKey using karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 990)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 991) movdqa \XMM5, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 992) pshufd $78, \XMM5, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 993) pxor \XMM5, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 994) paddd ONE(%rip), \XMM0 # INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 995) movdqu HashKey_4(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 996) pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 997) movdqa \XMM0, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 998) paddd ONE(%rip), \XMM0 # INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 999) movdqa \XMM0, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) paddd ONE(%rip), \XMM0 # INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) movdqa \XMM0, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) paddd ONE(%rip), \XMM0 # INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) movdqa \XMM0, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) pshufb %xmm15, \XMM1 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) pshufb %xmm15, \XMM2 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) pshufb %xmm15, \XMM3 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) pshufb %xmm15, \XMM4 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) pxor (%arg1), \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) pxor (%arg1), \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) pxor (%arg1), \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) pxor (%arg1), \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) movdqu HashKey_4_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) movaps 0x10(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) aesenc \TMP1, \XMM1 # Round 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018) aesenc \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019) aesenc \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) aesenc \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) movaps 0x20(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) aesenc \TMP1, \XMM1 # Round 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) aesenc \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) aesenc \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) aesenc \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) movdqa \XMM6, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) pshufd $78, \XMM6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) pxor \XMM6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) movdqu HashKey_3(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) movaps 0x30(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) aesenc \TMP3, \XMM1 # Round 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) movaps 0x40(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) aesenc \TMP3, \XMM1 # Round 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) movdqu HashKey_3_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) movaps 0x50(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) aesenc \TMP3, \XMM1 # Round 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) pxor \TMP1, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) pxor \XMM6, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) pxor \TMP2, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) movdqa \XMM7, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) pshufd $78, \XMM7, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) pxor \XMM7, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) movdqu HashKey_2(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) # Multiply TMP5 * HashKey using karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) movaps 0x60(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) aesenc \TMP3, \XMM1 # Round 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) movaps 0x70(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) aesenc \TMP3, \XMM1 # Round 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) movdqu HashKey_2_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) movaps 0x80(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) aesenc \TMP3, \XMM1 # Round 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) pxor \TMP1, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) pxor \XMM7, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) pxor \TMP2, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) # Multiply XMM8 * HashKey
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) # XMM8 and TMP5 hold the values for the two operands
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) movdqa \XMM8, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) pshufd $78, \XMM8, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) pxor \XMM8, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) movdqu HashKey(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) movaps 0x90(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) aesenc \TMP3, \XMM1 # Round 9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) lea 0xa0(%arg1),%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) mov keysize,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) shr $2,%eax # 128->4, 192->6, 256->8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) sub $4,%eax # 128->0, 192->2, 256->4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) jz aes_loop_par_enc_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) aes_loop_par_enc\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) MOVADQ (%r10),\TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) .irpc index, 1234
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) aesenc \TMP3, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) add $16,%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) sub $1,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) jnz aes_loop_par_enc\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) aes_loop_par_enc_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) MOVADQ (%r10), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) aesenclast \TMP3, \XMM1 # Round 10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) aesenclast \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) aesenclast \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) aesenclast \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) movdqu HashKey_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) movdqu (%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) movdqu 16(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) movdqu 32(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) movdqu 48(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) movdqu \XMM1, (%arg3,%r11,1) # Write to the ciphertext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) movdqu \XMM2, 16(%arg3,%r11,1) # Write to the ciphertext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) movdqu \XMM3, 32(%arg3,%r11,1) # Write to the ciphertext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) movdqu \XMM4, 48(%arg3,%r11,1) # Write to the ciphertext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) pshufb %xmm15, \XMM1 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) pshufb %xmm15, \XMM2 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) pshufb %xmm15, \XMM3 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) pshufb %xmm15, \XMM4 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) pxor \TMP4, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) pxor \XMM8, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) pxor \TMP6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) pxor \TMP1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) pxor \XMM5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) movdqa \TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) pslldq $8, \TMP3 # left shift TMP3 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) psrldq $8, \TMP2 # right shift TMP2 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) pxor \TMP3, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) # first phase of reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) movdqa \XMM5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) movdqa \XMM5, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) movdqa \XMM5, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) pslld $31, \TMP2 # packed right shift << 31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) pslld $30, \TMP3 # packed right shift << 30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) pslld $25, \TMP4 # packed right shift << 25
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) pxor \TMP3, \TMP2 # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) pxor \TMP4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) movdqa \TMP2, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) psrldq $4, \TMP5 # right shift T5 1 DW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) pslldq $12, \TMP2 # left shift T2 3 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) pxor \TMP2, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) # second phase of reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) movdqa \XMM5,\TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) movdqa \XMM5,\TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) psrld $1, \TMP2 # packed left shift >>1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) psrld $2, \TMP3 # packed left shift >>2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) psrld $7, \TMP4 # packed left shift >>7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) pxor \TMP3,\TMP2 # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) pxor \TMP4,\TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) pxor \TMP5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) pxor \TMP2, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) pxor \TMP1, \XMM5 # result is in TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) pxor \XMM5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) * decrypt 4 blocks at a time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) * ghash the 4 previously decrypted ciphertext blocks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) * arg1, %arg3, %arg4 are used as pointers only, not modified
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) * %r11 is the data offset value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) .macro GHASH_4_ENCRYPT_4_PARALLEL_dec TMP1 TMP2 TMP3 TMP4 TMP5 \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) TMP6 XMM0 XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 operation
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) movdqa \XMM1, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) movdqa \XMM2, \XMM6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) movdqa \XMM3, \XMM7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) movdqa \XMM4, \XMM8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) movdqa SHUF_MASK(%rip), %xmm15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) # multiply TMP5 * HashKey using karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) movdqa \XMM5, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) pshufd $78, \XMM5, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) pxor \XMM5, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) paddd ONE(%rip), \XMM0 # INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) movdqu HashKey_4(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) pclmulqdq $0x11, \TMP5, \TMP4 # TMP4 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) movdqa \XMM0, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) paddd ONE(%rip), \XMM0 # INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) movdqa \XMM0, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) paddd ONE(%rip), \XMM0 # INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) movdqa \XMM0, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) paddd ONE(%rip), \XMM0 # INCR CNT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) movdqa \XMM0, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) pshufb %xmm15, \XMM1 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) pclmulqdq $0x00, \TMP5, \XMM5 # XMM5 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) pshufb %xmm15, \XMM2 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) pshufb %xmm15, \XMM3 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) pshufb %xmm15, \XMM4 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) pxor (%arg1), \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) pxor (%arg1), \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) pxor (%arg1), \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) pxor (%arg1), \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) movdqu HashKey_4_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) pclmulqdq $0x00, \TMP5, \TMP6 # TMP6 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) movaps 0x10(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) aesenc \TMP1, \XMM1 # Round 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) aesenc \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) aesenc \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) aesenc \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) movaps 0x20(%arg1), \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) aesenc \TMP1, \XMM1 # Round 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) aesenc \TMP1, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) aesenc \TMP1, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) aesenc \TMP1, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) movdqa \XMM6, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) pshufd $78, \XMM6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) pxor \XMM6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) movdqu HashKey_3(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1 * b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) movaps 0x30(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) aesenc \TMP3, \XMM1 # Round 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) pclmulqdq $0x00, \TMP5, \XMM6 # XMM6 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) movaps 0x40(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) aesenc \TMP3, \XMM1 # Round 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) movdqu HashKey_3_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) movaps 0x50(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) aesenc \TMP3, \XMM1 # Round 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) pxor \TMP1, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) pxor \XMM6, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) pxor \TMP2, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) movdqa \XMM7, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) pshufd $78, \XMM7, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) pxor \XMM7, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) movdqu HashKey_2(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) # Multiply TMP5 * HashKey using karatsuba
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) movaps 0x60(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) aesenc \TMP3, \XMM1 # Round 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) pclmulqdq $0x00, \TMP5, \XMM7 # XMM7 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) movaps 0x70(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) aesenc \TMP3, \XMM1 # Round 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) movdqu HashKey_2_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) movaps 0x80(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) aesenc \TMP3, \XMM1 # Round 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) pxor \TMP1, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) # accumulate the results in TMP4:XMM5, TMP6 holds the middle part
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) pxor \XMM7, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) pxor \TMP2, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) # Multiply XMM8 * HashKey
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) # XMM8 and TMP5 hold the values for the two operands
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) movdqa \XMM8, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) pshufd $78, \XMM8, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) pxor \XMM8, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) movdqu HashKey(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) movaps 0x90(%arg1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) aesenc \TMP3, \XMM1 # Round 9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) aesenc \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) aesenc \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) aesenc \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) pclmulqdq $0x00, \TMP5, \XMM8 # XMM8 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) lea 0xa0(%arg1),%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) mov keysize,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) shr $2,%eax # 128->4, 192->6, 256->8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) sub $4,%eax # 128->0, 192->2, 256->4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) jz aes_loop_par_dec_done\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) aes_loop_par_dec\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) MOVADQ (%r10),\TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) .irpc index, 1234
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) aesenc \TMP3, %xmm\index
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) .endr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) add $16,%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) sub $1,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) jnz aes_loop_par_dec\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) aes_loop_par_dec_done\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) MOVADQ (%r10), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) aesenclast \TMP3, \XMM1 # last round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) aesenclast \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) aesenclast \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) aesenclast \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) movdqu HashKey_k(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) pclmulqdq $0x00, \TMP5, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) movdqu (%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) pxor \TMP3, \XMM1 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) movdqu \XMM1, (%arg3,%r11,1) # Write to plaintext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) movdqa \TMP3, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) movdqu 16(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) pxor \TMP3, \XMM2 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) movdqu \XMM2, 16(%arg3,%r11,1) # Write to plaintext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) movdqa \TMP3, \XMM2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) movdqu 32(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) pxor \TMP3, \XMM3 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) movdqu \XMM3, 32(%arg3,%r11,1) # Write to plaintext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) movdqa \TMP3, \XMM3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) movdqu 48(%arg4,%r11,1), \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) pxor \TMP3, \XMM4 # Ciphertext/Plaintext XOR EK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) movdqu \XMM4, 48(%arg3,%r11,1) # Write to plaintext buffer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) movdqa \TMP3, \XMM4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) pshufb %xmm15, \XMM1 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) pshufb %xmm15, \XMM2 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) pshufb %xmm15, \XMM3 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) pshufb %xmm15, \XMM4 # perform a 16 byte swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) pxor \TMP4, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) pxor \XMM8, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) pxor \TMP6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) pxor \TMP1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) pxor \XMM5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) movdqa \TMP2, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) pslldq $8, \TMP3 # left shift TMP3 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) psrldq $8, \TMP2 # right shift TMP2 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) pxor \TMP3, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) pxor \TMP2, \TMP1 # accumulate the results in TMP1:XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) # first phase of reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) movdqa \XMM5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) movdqa \XMM5, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) movdqa \XMM5, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) # move XMM5 into TMP2, TMP3, TMP4 in order to perform shifts independently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) pslld $31, \TMP2 # packed right shift << 31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) pslld $30, \TMP3 # packed right shift << 30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) pslld $25, \TMP4 # packed right shift << 25
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) pxor \TMP3, \TMP2 # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) pxor \TMP4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) movdqa \TMP2, \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) psrldq $4, \TMP5 # right shift T5 1 DW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) pslldq $12, \TMP2 # left shift T2 3 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) pxor \TMP2, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) # second phase of reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) movdqa \XMM5,\TMP2 # make 3 copies of XMM5 into TMP2, TMP3, TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) movdqa \XMM5,\TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) movdqa \XMM5,\TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) psrld $1, \TMP2 # packed left shift >>1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) psrld $2, \TMP3 # packed left shift >>2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) psrld $7, \TMP4 # packed left shift >>7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) pxor \TMP3,\TMP2 # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) pxor \TMP4,\TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) pxor \TMP5, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) pxor \TMP2, \XMM5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) pxor \TMP1, \XMM5 # result is in TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) pxor \XMM5, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) /* GHASH the last 4 ciphertext blocks. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) .macro GHASH_LAST_4 TMP1 TMP2 TMP3 TMP4 TMP5 TMP6 \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) TMP7 XMM1 XMM2 XMM3 XMM4 XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) # Multiply TMP6 * HashKey (using Karatsuba)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) movdqa \XMM1, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) pshufd $78, \XMM1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) pxor \XMM1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) movdqu HashKey_4(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) pclmulqdq $0x11, \TMP5, \TMP6 # TMP6 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) pclmulqdq $0x00, \TMP5, \XMM1 # XMM1 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) movdqu HashKey_4_k(%arg2), \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) movdqa \XMM1, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) movdqa \TMP2, \XMM1 # result in TMP6, XMMDst, XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) # Multiply TMP1 * HashKey (using Karatsuba)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) movdqa \XMM2, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) pshufd $78, \XMM2, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) pxor \XMM2, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) movdqu HashKey_3(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) pclmulqdq $0x00, \TMP5, \XMM2 # XMM2 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) movdqu HashKey_3_k(%arg2), \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) pxor \TMP1, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) pxor \XMM2, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) pxor \TMP2, \XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) # results accumulated in TMP6, XMMDst, XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) # Multiply TMP1 * HashKey (using Karatsuba)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) movdqa \XMM3, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) pshufd $78, \XMM3, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) pxor \XMM3, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) movdqu HashKey_2(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) pclmulqdq $0x00, \TMP5, \XMM3 # XMM3 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) movdqu HashKey_2_k(%arg2), \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) pxor \TMP1, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) pxor \XMM3, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) pxor \TMP2, \XMM1 # results accumulated in TMP6, XMMDst, XMM1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) # Multiply TMP1 * HashKey (using Karatsuba)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) movdqa \XMM4, \TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) pshufd $78, \XMM4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) pxor \XMM4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) movdqu HashKey(%arg2), \TMP5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) pclmulqdq $0x11, \TMP5, \TMP1 # TMP1 = a1*b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) pclmulqdq $0x00, \TMP5, \XMM4 # XMM4 = a0*b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) movdqu HashKey_k(%arg2), \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) pclmulqdq $0x00, \TMP4, \TMP2 # TMP2 = (a1+a0)*(b1+b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) pxor \TMP1, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) pxor \XMM4, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) pxor \XMM1, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) pxor \TMP6, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) pxor \XMMDst, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) # middle section of the temp results combined as in karatsuba algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) movdqa \TMP2, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) pslldq $8, \TMP4 # left shift TMP4 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) psrldq $8, \TMP2 # right shift TMP2 2 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) pxor \TMP4, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) pxor \TMP2, \TMP6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) # TMP6:XMMDst holds the result of the accumulated carry-less multiplications
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) # first phase of the reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) movdqa \XMMDst, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) movdqa \XMMDst, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) movdqa \XMMDst, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) # move XMMDst into TMP2, TMP3, TMP4 in order to perform 3 shifts independently
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) pslld $31, \TMP2 # packed right shifting << 31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) pslld $30, \TMP3 # packed right shifting << 30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) pslld $25, \TMP4 # packed right shifting << 25
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) pxor \TMP3, \TMP2 # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) pxor \TMP4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) movdqa \TMP2, \TMP7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) psrldq $4, \TMP7 # right shift TMP7 1 DW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) pslldq $12, \TMP2 # left shift TMP2 3 DWs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) pxor \TMP2, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) # second phase of the reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) movdqa \XMMDst, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) # make 3 copies of XMMDst for doing 3 shift operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) movdqa \XMMDst, \TMP3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) movdqa \XMMDst, \TMP4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) psrld $1, \TMP2 # packed left shift >> 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) psrld $2, \TMP3 # packed left shift >> 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) psrld $7, \TMP4 # packed left shift >> 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) pxor \TMP3, \TMP2 # xor the shifted versions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) pxor \TMP4, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) pxor \TMP7, \TMP2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) pxor \TMP2, \XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) pxor \TMP6, \XMMDst # reduced result is in XMMDst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) /* Encryption of a single block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) * uses eax & r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) .macro ENCRYPT_SINGLE_BLOCK XMM0 TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) pxor (%arg1), \XMM0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) mov keysize,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) shr $2,%eax # 128->4, 192->6, 256->8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) add $5,%eax # 128->9, 192->11, 256->13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) lea 16(%arg1), %r10 # get first expanded key address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) _esb_loop_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) MOVADQ (%r10),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) aesenc \TMP1,\XMM0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) add $16,%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) sub $1,%eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) jnz _esb_loop_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) MOVADQ (%r10),\TMP1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) aesenclast \TMP1,\XMM0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) * void aesni_gcm_dec(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) * struct gcm_context_data *data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) * // Context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) * u8 *out, // Plaintext output. Encrypt in-place is allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) * const u8 *in, // Ciphertext input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) * u64 plaintext_len, // Length of data in bytes for decryption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) * u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) * // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) * // concatenated with 0x00000001. 16-byte aligned pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) * u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) * const u8 *aad, // Additional Authentication Data (AAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) * u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) * u8 *auth_tag, // Authenticated Tag output. The driver will compare this to the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) * // given authentication tag and only return the plaintext if they match.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) * // (most likely), 12 or 8.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) * Assumptions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) * keys:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) * keys are pre-expanded and aligned to 16 bytes. we are using the first
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) * set of 11 keys in the data structure void *aes_ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) * iv:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) * 0 1 2 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) * | Salt (From the SA) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) * | Initialization Vector |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) * | (This is the sequence number from IPSec header) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) * | 0x1 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) * AAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) * AAD padded to 128 bits with 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) * for example, assume AAD is a u32 vector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) * if AAD is 8 bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) * AAD[3] = {A0, A1};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) * padded AAD in xmm register = {A1 A0 0 0}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) * 0 1 2 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) * | SPI (A1) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) * | 32-bit Sequence Number (A0) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) * | 0x0 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) * AAD Format with 32-bit Sequence Number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) * if AAD is 12 bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) * AAD[3] = {A0, A1, A2};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) * padded AAD in xmm register = {A2 A1 A0 0}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) * 0 1 2 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) * | SPI (A2) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) * | 64-bit Extended Sequence Number {A1,A0} |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) * | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) * | 0x0 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) * AAD Format with 64-bit Extended Sequence Number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) * poly = x^128 + x^127 + x^126 + x^121 + 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) *****************************************************************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) SYM_FUNC_START(aesni_gcm_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) GCM_INIT %arg6, arg7, arg8, arg9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) GCM_ENC_DEC dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) GCM_COMPLETE arg10, arg11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) SYM_FUNC_END(aesni_gcm_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) * void aesni_gcm_enc(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) * struct gcm_context_data *data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) * // Context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) * u8 *out, // Ciphertext output. Encrypt in-place is allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) * const u8 *in, // Plaintext input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) * u64 plaintext_len, // Length of data in bytes for encryption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) * u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) * // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) * // concatenated with 0x00000001. 16-byte aligned pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) * u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) * const u8 *aad, // Additional Authentication Data (AAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) * u64 aad_len, // Length of AAD in bytes. With RFC4106 this is going to be 8 or 12 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) * u8 *auth_tag, // Authenticated Tag output.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) * // 12 or 8.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) * Assumptions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) * keys:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) * keys are pre-expanded and aligned to 16 bytes. we are using the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) * first set of 11 keys in the data structure void *aes_ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) * iv:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) * 0 1 2 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) * | Salt (From the SA) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) * | Initialization Vector |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) * | (This is the sequence number from IPSec header) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) * | 0x1 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) * AAD:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) * AAD padded to 128 bits with 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) * for example, assume AAD is a u32 vector
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) * if AAD is 8 bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) * AAD[3] = {A0, A1};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) * padded AAD in xmm register = {A1 A0 0 0}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) * 0 1 2 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) * | SPI (A1) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) * | 32-bit Sequence Number (A0) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) * | 0x0 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) * AAD Format with 32-bit Sequence Number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) * if AAD is 12 bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) * AAD[3] = {A0, A1, A2};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) * padded AAD in xmm register = {A2 A1 A0 0}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) * 0 1 2 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) * | SPI (A2) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) * | 64-bit Extended Sequence Number {A1,A0} |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) * | |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) * | 0x0 |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) * AAD Format with 64-bit Extended Sequence Number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) * poly = x^128 + x^127 + x^126 + x^121 + 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) ***************************************************************************/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) SYM_FUNC_START(aesni_gcm_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) GCM_INIT %arg6, arg7, arg8, arg9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) GCM_ENC_DEC enc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) GCM_COMPLETE arg10, arg11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) SYM_FUNC_END(aesni_gcm_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) * void aesni_gcm_init(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) * struct gcm_context_data *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) * // context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) * u8 *iv, // Pre-counter block j0: 4 byte salt (from Security Association)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) * // concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) * // concatenated with 0x00000001. 16-byte aligned pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) * u8 *hash_subkey, // H, the Hash sub key input. Data starts on a 16-byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) * const u8 *aad, // Additional Authentication Data (AAD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) * u64 aad_len) // Length of AAD in bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) SYM_FUNC_START(aesni_gcm_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) GCM_INIT %arg3, %arg4,%arg5, %arg6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) SYM_FUNC_END(aesni_gcm_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) * void aesni_gcm_enc_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) * struct gcm_context_data *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) * // context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) * u8 *out, // Ciphertext output. Encrypt in-place is allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) * const u8 *in, // Plaintext input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) * u64 plaintext_len, // Length of data in bytes for encryption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) SYM_FUNC_START(aesni_gcm_enc_update)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) GCM_ENC_DEC enc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) SYM_FUNC_END(aesni_gcm_enc_update)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) * void aesni_gcm_dec_update(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) * struct gcm_context_data *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) * // context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) * u8 *out, // Ciphertext output. Encrypt in-place is allowed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) * const u8 *in, // Plaintext input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) * u64 plaintext_len, // Length of data in bytes for encryption.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) SYM_FUNC_START(aesni_gcm_dec_update)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) GCM_ENC_DEC dec
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) SYM_FUNC_END(aesni_gcm_dec_update)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) /*****************************************************************************
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) * void aesni_gcm_finalize(void *aes_ctx, // AES Key schedule. Starts on a 16 byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) * struct gcm_context_data *data,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) * // context data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) * u8 *auth_tag, // Authenticated Tag output.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) * u64 auth_tag_len); // Authenticated Tag Length in bytes. Valid values are 16 (most likely),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) * // 12 or 8.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) SYM_FUNC_START(aesni_gcm_finalize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) FUNC_SAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) GCM_COMPLETE %arg3 %arg4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) FUNC_RESTORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) SYM_FUNC_END(aesni_gcm_finalize)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) SYM_FUNC_START_LOCAL(_key_expansion_256a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) pshufd $0b11111111, %xmm1, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) shufps $0b00010000, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) shufps $0b10001100, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) pxor %xmm1, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) movaps %xmm0, (TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) add $0x10, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) SYM_FUNC_END(_key_expansion_256a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) SYM_FUNC_END_ALIAS(_key_expansion_128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) SYM_FUNC_START_LOCAL(_key_expansion_192a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) pshufd $0b01010101, %xmm1, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) shufps $0b00010000, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) shufps $0b10001100, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) pxor %xmm1, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) movaps %xmm2, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) movaps %xmm2, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) pslldq $4, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) pshufd $0b11111111, %xmm0, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) pxor %xmm3, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) pxor %xmm5, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) movaps %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) shufps $0b01000100, %xmm0, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) movaps %xmm6, (TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) shufps $0b01001110, %xmm2, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) movaps %xmm1, 0x10(TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) add $0x20, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795) SYM_FUNC_END(_key_expansion_192a)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) SYM_FUNC_START_LOCAL(_key_expansion_192b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) pshufd $0b01010101, %xmm1, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) shufps $0b00010000, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) shufps $0b10001100, %xmm0, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) pxor %xmm4, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) pxor %xmm1, %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) movaps %xmm2, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) pslldq $4, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) pshufd $0b11111111, %xmm0, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808) pxor %xmm3, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809) pxor %xmm5, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811) movaps %xmm0, (TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) add $0x10, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) SYM_FUNC_END(_key_expansion_192b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) SYM_FUNC_START_LOCAL(_key_expansion_256b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) pshufd $0b10101010, %xmm1, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) shufps $0b00010000, %xmm2, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) pxor %xmm4, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) shufps $0b10001100, %xmm2, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) pxor %xmm4, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) pxor %xmm1, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) movaps %xmm2, (TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) add $0x10, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) SYM_FUNC_END(_key_expansion_256b)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) * int aesni_set_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) * unsigned int key_len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) SYM_FUNC_START(aesni_set_key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) movl (FRAME_OFFSET+8)(%esp), KEYP # ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) movl (FRAME_OFFSET+12)(%esp), UKEYP # in_key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) movl (FRAME_OFFSET+16)(%esp), %edx # key_len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) movups (UKEYP), %xmm0 # user key (first 16 bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) movaps %xmm0, (KEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) lea 0x10(KEYP), TKEYP # key addr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) movl %edx, 480(KEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) pxor %xmm4, %xmm4 # xmm4 is assumed 0 in _key_expansion_x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) cmp $24, %dl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) jb .Lenc_key128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) je .Lenc_key192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) movups 0x10(UKEYP), %xmm2 # other user key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) movaps %xmm2, (TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) add $0x10, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) aeskeygenassist $0x1, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) aeskeygenassist $0x2, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) aeskeygenassist $0x4, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) aeskeygenassist $0x8, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) aeskeygenassist $0x10, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) aeskeygenassist $0x20, %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) call _key_expansion_256b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) call _key_expansion_256a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) jmp .Ldec_key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) .Lenc_key192:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) movq 0x10(UKEYP), %xmm2 # other user key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) aeskeygenassist $0x1, %xmm2, %xmm1 # round 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) call _key_expansion_192a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) aeskeygenassist $0x2, %xmm2, %xmm1 # round 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) call _key_expansion_192b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) aeskeygenassist $0x4, %xmm2, %xmm1 # round 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) call _key_expansion_192a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) aeskeygenassist $0x8, %xmm2, %xmm1 # round 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) call _key_expansion_192b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) aeskeygenassist $0x10, %xmm2, %xmm1 # round 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) call _key_expansion_192a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) aeskeygenassist $0x20, %xmm2, %xmm1 # round 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) call _key_expansion_192b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) aeskeygenassist $0x40, %xmm2, %xmm1 # round 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) call _key_expansion_192a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) aeskeygenassist $0x80, %xmm2, %xmm1 # round 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) call _key_expansion_192b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) jmp .Ldec_key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) .Lenc_key128:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) aeskeygenassist $0x1, %xmm0, %xmm1 # round 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) aeskeygenassist $0x2, %xmm0, %xmm1 # round 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) aeskeygenassist $0x4, %xmm0, %xmm1 # round 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) aeskeygenassist $0x8, %xmm0, %xmm1 # round 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) aeskeygenassist $0x10, %xmm0, %xmm1 # round 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) aeskeygenassist $0x20, %xmm0, %xmm1 # round 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) aeskeygenassist $0x40, %xmm0, %xmm1 # round 7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) aeskeygenassist $0x80, %xmm0, %xmm1 # round 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) aeskeygenassist $0x1b, %xmm0, %xmm1 # round 9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) aeskeygenassist $0x36, %xmm0, %xmm1 # round 10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) call _key_expansion_128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) .Ldec_key:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) sub $0x10, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) movaps (KEYP), %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) movaps (TKEYP), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) movaps %xmm0, 240(TKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) movaps %xmm1, 240(KEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) add $0x10, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) lea 240-16(TKEYP), UKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) .Ldec_key_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) movaps (KEYP), %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) aesimc %xmm0, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) movaps %xmm1, (UKEYP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) add $0x10, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) sub $0x10, UKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) cmp TKEYP, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) jb .Ldec_key_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) xor AREG, AREG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) SYM_FUNC_END(aesni_set_key)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) * void aesni_enc(const void *ctx, u8 *dst, const u8 *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) SYM_FUNC_START(aesni_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) movl (FRAME_OFFSET+12)(%esp), KEYP # ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) movl (FRAME_OFFSET+16)(%esp), OUTP # dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) movl (FRAME_OFFSET+20)(%esp), INP # src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) movl 480(KEYP), KLEN # key length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) movups (INP), STATE # input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) call _aesni_enc1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) movups STATE, (OUTP) # output
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) SYM_FUNC_END(aesni_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) * _aesni_enc1: internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) * KEYP: key struct pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) * KLEN: round count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) * STATE: initial state (input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) * STATE: finial state (output)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) * KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) * TKEYP (T1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) SYM_FUNC_START_LOCAL(_aesni_enc1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) movaps (KEYP), KEY # key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) mov KEYP, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) pxor KEY, STATE # round 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) add $0x30, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) cmp $24, KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) jb .Lenc128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) lea 0x20(TKEYP), TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) je .Lenc192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) add $0x20, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) movaps -0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) movaps -0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) .Lenc192:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) movaps -0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) movaps -0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) .Lenc128:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) movaps -0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) movaps -0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) movaps (TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) movaps 0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) movaps 0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) movaps 0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) movaps 0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) movaps 0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) movaps 0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) aesenc KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) movaps 0x70(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) aesenclast KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) SYM_FUNC_END(_aesni_enc1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) * _aesni_enc4: internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) * KEYP: key struct pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) * KLEN: round count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) * STATE1: initial state (input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) * STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) * STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) * STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) * STATE1: finial state (output)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) * STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) * STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) * STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) * KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) * TKEYP (T1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) SYM_FUNC_START_LOCAL(_aesni_enc4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) movaps (KEYP), KEY # key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) mov KEYP, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) pxor KEY, STATE1 # round 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) pxor KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) pxor KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) pxor KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) add $0x30, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) cmp $24, KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) jb .L4enc128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) lea 0x20(TKEYP), TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) je .L4enc192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) add $0x20, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) movaps -0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) movaps -0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) #.align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) .L4enc192:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) movaps -0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) movaps -0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) #.align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) .L4enc128:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) movaps -0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) movaps -0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) movaps (TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) movaps 0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) movaps 0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) movaps 0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) movaps 0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) movaps 0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) movaps 0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) aesenc KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) aesenc KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) aesenc KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) aesenc KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) movaps 0x70(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) aesenclast KEY, STATE1 # last round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) aesenclast KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) aesenclast KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) aesenclast KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) SYM_FUNC_END(_aesni_enc4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) * void aesni_dec (const void *ctx, u8 *dst, const u8 *src)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) SYM_FUNC_START(aesni_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) movl (FRAME_OFFSET+12)(%esp), KEYP # ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) movl (FRAME_OFFSET+16)(%esp), OUTP # dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) movl (FRAME_OFFSET+20)(%esp), INP # src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) mov 480(KEYP), KLEN # key length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) add $240, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) movups (INP), STATE # input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) call _aesni_dec1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) movups STATE, (OUTP) #output
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) SYM_FUNC_END(aesni_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) * _aesni_dec1: internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) * KEYP: key struct pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) * KLEN: key length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) * STATE: initial state (input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) * STATE: finial state (output)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) * KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) * TKEYP (T1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) SYM_FUNC_START_LOCAL(_aesni_dec1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) movaps (KEYP), KEY # key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) mov KEYP, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) pxor KEY, STATE # round 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) add $0x30, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) cmp $24, KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) jb .Ldec128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) lea 0x20(TKEYP), TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) je .Ldec192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) add $0x20, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) movaps -0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) movaps -0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) .Ldec192:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) movaps -0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) movaps -0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) .Ldec128:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) movaps -0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) movaps -0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) movaps (TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) movaps 0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) movaps 0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) movaps 0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) movaps 0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) movaps 0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) movaps 0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) aesdec KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) movaps 0x70(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) aesdeclast KEY, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) SYM_FUNC_END(_aesni_dec1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) * _aesni_dec4: internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) * KEYP: key struct pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) * KLEN: key length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) * STATE1: initial state (input)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) * STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) * STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) * STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) * STATE1: finial state (output)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) * STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) * STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) * STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) * KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) * TKEYP (T1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) SYM_FUNC_START_LOCAL(_aesni_dec4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) movaps (KEYP), KEY # key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) mov KEYP, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) pxor KEY, STATE1 # round 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) pxor KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) pxor KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) pxor KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) add $0x30, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) cmp $24, KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) jb .L4dec128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) lea 0x20(TKEYP), TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) je .L4dec192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) add $0x20, TKEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) movaps -0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) movaps -0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) .L4dec192:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) movaps -0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) movaps -0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) .L4dec128:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) movaps -0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) movaps -0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) movaps (TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) movaps 0x10(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) movaps 0x20(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) movaps 0x30(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) movaps 0x40(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) movaps 0x50(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) movaps 0x60(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) aesdec KEY, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) aesdec KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) aesdec KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) aesdec KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) movaps 0x70(TKEYP), KEY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) aesdeclast KEY, STATE1 # last round
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) aesdeclast KEY, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) aesdeclast KEY, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) aesdeclast KEY, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) SYM_FUNC_END(_aesni_dec4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) * void aesni_ecb_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) * size_t len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) SYM_FUNC_START(aesni_ecb_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) pushl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) movl (FRAME_OFFSET+20)(%esp), OUTP # dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) movl (FRAME_OFFSET+24)(%esp), INP # src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) movl (FRAME_OFFSET+28)(%esp), LEN # len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) test LEN, LEN # check length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) jz .Lecb_enc_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) jb .Lecb_enc_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) jb .Lecb_enc_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) .Lecb_enc_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) movups (INP), STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) movups 0x10(INP), STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) movups 0x20(INP), STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) movups 0x30(INP), STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) call _aesni_enc4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) movups STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) movups STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) movups STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) jge .Lecb_enc_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) jb .Lecb_enc_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) .Lecb_enc_loop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) movups (INP), STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) call _aesni_enc1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) sub $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) add $16, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) add $16, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) jge .Lecb_enc_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) .Lecb_enc_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) popl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) SYM_FUNC_END(aesni_ecb_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) * void aesni_ecb_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) * size_t len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) SYM_FUNC_START(aesni_ecb_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) pushl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) movl (FRAME_OFFSET+16)(%esp), KEYP # ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) movl (FRAME_OFFSET+20)(%esp), OUTP # dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) movl (FRAME_OFFSET+24)(%esp), INP # src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) movl (FRAME_OFFSET+28)(%esp), LEN # len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) test LEN, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) jz .Lecb_dec_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) add $240, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) jb .Lecb_dec_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) jb .Lecb_dec_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) .Lecb_dec_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) movups (INP), STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) movups 0x10(INP), STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) movups 0x20(INP), STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) movups 0x30(INP), STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) call _aesni_dec4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) movups STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) movups STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) movups STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) jge .Lecb_dec_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) jb .Lecb_dec_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) .Lecb_dec_loop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) movups (INP), STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) call _aesni_dec1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) sub $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) add $16, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) add $16, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) jge .Lecb_dec_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) .Lecb_dec_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) popl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) SYM_FUNC_END(aesni_ecb_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) * void aesni_cbc_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) * size_t len, u8 *iv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) SYM_FUNC_START(aesni_cbc_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) pushl IVP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) pushl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) movl (FRAME_OFFSET+24)(%esp), OUTP # dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) movl (FRAME_OFFSET+28)(%esp), INP # src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) movl (FRAME_OFFSET+32)(%esp), LEN # len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) movl (FRAME_OFFSET+36)(%esp), IVP # iv
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) jb .Lcbc_enc_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) movups (IVP), STATE # load iv as initial state
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) .Lcbc_enc_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) movups (INP), IN # load input
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) pxor IN, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) call _aesni_enc1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) movups STATE, (OUTP) # store output
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) sub $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) add $16, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) add $16, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) jge .Lcbc_enc_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) movups STATE, (IVP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) .Lcbc_enc_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) popl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) popl IVP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) SYM_FUNC_END(aesni_cbc_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) * void aesni_cbc_dec(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) * size_t len, u8 *iv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) SYM_FUNC_START(aesni_cbc_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) pushl IVP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) pushl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) pushl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) pushl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) movl (FRAME_OFFSET+20)(%esp), KEYP # ctx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) movl (FRAME_OFFSET+24)(%esp), OUTP # dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) movl (FRAME_OFFSET+28)(%esp), INP # src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) movl (FRAME_OFFSET+32)(%esp), LEN # len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) movl (FRAME_OFFSET+36)(%esp), IVP # iv
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) jb .Lcbc_dec_just_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) add $240, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) movups (IVP), IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) jb .Lcbc_dec_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) .Lcbc_dec_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) movups (INP), IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) movaps IN1, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) movups 0x10(INP), IN2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) movaps IN2, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) movups 0x20(INP), IN3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) movaps IN3, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) movups 0x30(INP), IN4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) movaps IN4, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) movups 0x20(INP), IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) movaps IN1, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) movups 0x30(INP), IN2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) movaps IN2, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) call _aesni_dec4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) pxor IV, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) pxor IN1, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) pxor IN2, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) pxor IN3, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) movaps IN4, IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) pxor IN1, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) movaps IN2, IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) movups (INP), IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) pxor IN1, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) movups 0x10(INP), IN2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) pxor IN2, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) movups STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) movups STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) movups STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) jge .Lcbc_dec_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) jb .Lcbc_dec_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) .Lcbc_dec_loop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) movups (INP), IN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) movaps IN, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) call _aesni_dec1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) pxor IV, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) movups STATE, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) movaps IN, IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) sub $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) add $16, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) add $16, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) jge .Lcbc_dec_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) .Lcbc_dec_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) movups IV, (IVP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) .Lcbc_dec_just_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) #ifndef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) popl KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) popl KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) popl LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) popl IVP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) SYM_FUNC_END(aesni_cbc_dec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) .pushsection .rodata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) .Lbswap_mask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) .popsection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) * _aesni_inc_init: internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) * setup registers used by _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) * IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) * CTR: == IV, in little endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) * TCTR_LOW: == lower qword of CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) * INC: == 1, in little endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) * BSWAP_MASK == endian swapping mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598) SYM_FUNC_START_LOCAL(_aesni_inc_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599) movaps .Lbswap_mask, BSWAP_MASK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) movaps IV, CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) pshufb BSWAP_MASK, CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) mov $1, TCTR_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) movq TCTR_LOW, INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) movq CTR, TCTR_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) SYM_FUNC_END(_aesni_inc_init)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) * _aesni_inc: internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) * Increase IV by 1, IV is in big endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) * IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) * CTR: == IV, in little endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) * TCTR_LOW: == lower qword of CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) * INC: == 1, in little endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) * BSWAP_MASK == endian swapping mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) * IV: Increase by 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) * CTR: == output IV, in little endian
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) * TCTR_LOW: == lower qword of CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) SYM_FUNC_START_LOCAL(_aesni_inc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) paddq INC, CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) add $1, TCTR_LOW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) jnc .Linc_low
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) pslldq $8, INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) paddq INC, CTR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) psrldq $8, INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) .Linc_low:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) movaps CTR, IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) pshufb BSWAP_MASK, IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) SYM_FUNC_END(_aesni_inc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) * void aesni_ctr_enc(struct crypto_aes_ctx *ctx, const u8 *dst, u8 *src,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) * size_t len, u8 *iv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) SYM_FUNC_START(aesni_ctr_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) jb .Lctr_enc_just_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) movups (IVP), IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) call _aesni_inc_init
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) jb .Lctr_enc_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) .Lctr_enc_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) movaps IV, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) call _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) movups (INP), IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) movaps IV, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) call _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) movups 0x10(INP), IN2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) movaps IV, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) call _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) movups 0x20(INP), IN3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) movaps IV, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) call _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) movups 0x30(INP), IN4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) call _aesni_enc4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) pxor IN1, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) movups STATE1, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) pxor IN2, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) movups STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) pxor IN3, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) movups STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) pxor IN4, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) movups STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) cmp $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) jge .Lctr_enc_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) jb .Lctr_enc_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) .Lctr_enc_loop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) movaps IV, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) call _aesni_inc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) movups (INP), IN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) call _aesni_enc1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) pxor IN, STATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) movups STATE, (OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) sub $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) add $16, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) add $16, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) cmp $16, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) jge .Lctr_enc_loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) .Lctr_enc_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) movups IV, (IVP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) .Lctr_enc_just_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) SYM_FUNC_END(aesni_ctr_enc)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) * _aesni_gf128mul_x_ble: internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) * Multiply in GF(2^128) for XTS IVs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) * IV: current IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) * GF128MUL_MASK == mask with 0x87 and 0x01
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) * IV: next IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) * CTR: == temporary value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) #define _aesni_gf128mul_x_ble() \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) pshufd $0x13, IV, CTR; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) paddq IV, IV; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) psrad $31, CTR; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) pand GF128MUL_MASK, CTR; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) pxor CTR, IV;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) * void aesni_xts_encrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) * const u8 *src, unsigned int len, le128 *iv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) SYM_FUNC_START(aesni_xts_encrypt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725) movups (IVP), IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727) mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729) .Lxts_enc_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730) movdqa IV, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731) movdqu 0x00(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732) pxor INC, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733) movdqu IV, 0x00(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735) _aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736) movdqa IV, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737) movdqu 0x10(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738) pxor INC, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739) movdqu IV, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741) _aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742) movdqa IV, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743) movdqu 0x20(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) pxor INC, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) movdqu IV, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) _aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) movdqa IV, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) movdqu 0x30(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) pxor INC, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) movdqu IV, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) call _aesni_enc4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) movdqu 0x00(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) pxor INC, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) movdqu STATE1, 0x00(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) movdqu 0x10(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) pxor INC, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) movdqu STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) movdqu 0x20(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) pxor INC, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) movdqu STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) movdqu 0x30(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) pxor INC, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) movdqu STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) _aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774) add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775) sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776) ja .Lxts_enc_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778) movups IV, (IVP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782) SYM_FUNC_END(aesni_xts_encrypt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785) * void aesni_xts_decrypt(const struct crypto_aes_ctx *ctx, u8 *dst,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786) * const u8 *src, unsigned int len, le128 *iv)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788) SYM_FUNC_START(aesni_xts_decrypt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) movdqa .Lgf128mul_x_ble_mask, GF128MUL_MASK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) movups (IVP), IV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) mov 480(KEYP), KLEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) add $240, KEYP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) .Lxts_dec_loop4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) movdqa IV, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) movdqu 0x00(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) pxor INC, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) movdqu IV, 0x00(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) _aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) movdqa IV, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) movdqu 0x10(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) pxor INC, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807) movdqu IV, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809) _aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810) movdqa IV, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811) movdqu 0x20(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812) pxor INC, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813) movdqu IV, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815) _aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816) movdqa IV, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817) movdqu 0x30(INP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818) pxor INC, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819) movdqu IV, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821) call _aesni_dec4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) movdqu 0x00(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) pxor INC, STATE1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) movdqu STATE1, 0x00(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) movdqu 0x10(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) pxor INC, STATE2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) movdqu STATE2, 0x10(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) movdqu 0x20(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) pxor INC, STATE3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) movdqu STATE3, 0x20(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835) movdqu 0x30(OUTP), INC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836) pxor INC, STATE4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837) movdqu STATE4, 0x30(OUTP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839) _aesni_gf128mul_x_ble()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) add $64, INP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) add $64, OUTP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) sub $64, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) ja .Lxts_dec_loop4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) movups IV, (IVP)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) SYM_FUNC_END(aesni_xts_decrypt)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) #endif