^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Accelerated GHASH implementation with Intel PCLMULQDQ-NI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * instructions. This file contains accelerated part of ghash
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * implementation. More information about PCLMULQDQ can be found at:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * http://software.intel.com/en-us/articles/carry-less-multiplication-and-its-usage-for-computing-the-gcm-mode/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright (c) 2009 Intel Corp.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Author: Huang Ying <ying.huang@intel.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Vinodh Gopal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Erdinc Ozturk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Deniz Karakoyunlu
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <asm/frame.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) .section .rodata.cst16.bswap_mask, "aM", @progbits, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) .Lbswap_mask:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) .octa 0x000102030405060708090a0b0c0d0e0f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define DATA %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define SHASH %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define T1 %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #define T2 %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #define T3 %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define BSWAP %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #define IN1 %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * __clmul_gf128mul_ble: internal ABI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * DATA: operand1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * SHASH: operand2, hash_key << 1 mod poly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * DATA: operand1 * operand2 mod poly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * changed:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) SYM_FUNC_START_LOCAL(__clmul_gf128mul_ble)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) movaps DATA, T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) pshufd $0b01001110, DATA, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) pshufd $0b01001110, SHASH, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) pxor DATA, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) pxor SHASH, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) pclmulqdq $0x00, SHASH, DATA # DATA = a0 * b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) pclmulqdq $0x11, SHASH, T1 # T1 = a1 * b1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) pclmulqdq $0x00, T3, T2 # T2 = (a1 + a0) * (b1 + b0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) pxor DATA, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) pxor T1, T2 # T2 = a0 * b1 + a1 * b0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) movaps T2, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) pslldq $8, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) psrldq $8, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) pxor T3, DATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) pxor T2, T1 # <T1:DATA> is result of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) # carry-less multiplication
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) # first phase of the reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) movaps DATA, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) psllq $1, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) pxor DATA, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) psllq $5, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) pxor DATA, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) psllq $57, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) movaps T3, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) pslldq $8, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) psrldq $8, T3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) pxor T2, DATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) pxor T3, T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) # second phase of the reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) movaps DATA, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) psrlq $5, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) pxor DATA, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) psrlq $1, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) pxor DATA, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) psrlq $1, T2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) pxor T2, T1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) pxor T1, DATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) SYM_FUNC_END(__clmul_gf128mul_ble)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) /* void clmul_ghash_mul(char *dst, const u128 *shash) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) SYM_FUNC_START(clmul_ghash_mul)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) movups (%rdi), DATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) movups (%rsi), SHASH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) movaps .Lbswap_mask, BSWAP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) pshufb BSWAP, DATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) call __clmul_gf128mul_ble
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) pshufb BSWAP, DATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) movups DATA, (%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) SYM_FUNC_END(clmul_ghash_mul)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) * void clmul_ghash_update(char *dst, const char *src, unsigned int srclen,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) * const u128 *shash);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) SYM_FUNC_START(clmul_ghash_update)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) FRAME_BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) cmp $16, %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) jb .Lupdate_just_ret # check length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) movaps .Lbswap_mask, BSWAP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) movups (%rdi), DATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) movups (%rcx), SHASH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) pshufb BSWAP, DATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) .Lupdate_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) movups (%rsi), IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) pshufb BSWAP, IN1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) pxor IN1, DATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) call __clmul_gf128mul_ble
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) sub $16, %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) add $16, %rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) cmp $16, %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) jge .Lupdate_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) pshufb BSWAP, DATA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) movups DATA, (%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) .Lupdate_just_ret:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) FRAME_END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) SYM_FUNC_END(clmul_ghash_update)