^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* GPL HEADER START
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * This program is free software; you can redistribute it and/or modify
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * it under the terms of the GNU General Public License version 2 only,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * as published by the Free Software Foundation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * This program is distributed in the hope that it will be useful, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * WITHOUT ANY WARRANTY; without even the implied warranty of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * General Public License version 2 for more details (a copy is included
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * in the LICENSE file that accompanied this code).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * You should have received a copy of the GNU General Public License
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * version 2 along with this program; If not, see http://www.gnu.org/licenses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * Please visit http://www.xyratex.com/contact if you need additional
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * information or have any questions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * GPL HEADER END
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * Copyright 2012 Xyratex Technology Limited
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * calculation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * at:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * http://www.intel.com/products/processor/manuals/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * Volume 2B: Instruction Set Reference, N-Z
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * Authors: Gregory Prestas <Gregory_Prestas@us.xyratex.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * Alexander Boyko <Alexander_Boyko@xyratex.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) .section .rodata
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) .align 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * [x4*128+32 mod P(x) << 32)]' << 1 = 0x154442bd4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * #define CONSTANT_R1 0x154442bd4LL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * [(x4*128-32 mod P(x) << 32)]' << 1 = 0x1c6e41596
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * #define CONSTANT_R2 0x1c6e41596LL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) .Lconstant_R2R1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) .octa 0x00000001c6e415960000000154442bd4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * [(x128+32 mod P(x) << 32)]' << 1 = 0x1751997d0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * #define CONSTANT_R3 0x1751997d0LL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * [(x128-32 mod P(x) << 32)]' << 1 = 0x0ccaa009e
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * #define CONSTANT_R4 0x0ccaa009eLL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) .Lconstant_R4R3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) .octa 0x00000000ccaa009e00000001751997d0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) * [(x64 mod P(x) << 32)]' << 1 = 0x163cd6124
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) * #define CONSTANT_R5 0x163cd6124LL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) .Lconstant_R5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) .octa 0x00000000000000000000000163cd6124
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) .Lconstant_mask32:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) .octa 0x000000000000000000000000FFFFFFFF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * #define CONSTANT_RU 0x1F7011641LL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) .Lconstant_RUpoly:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) .octa 0x00000001F701164100000001DB710641
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #define CONSTANT %xmm0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #define BUF %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) #define LEN %rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) #define CRC %edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #define BUF %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) #define LEN %edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #define CRC %ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * Calculate crc32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * BUF - buffer (16 bytes aligned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than 63
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * CRC - initial crc32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * return %eax crc32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * uint crc32_pclmul_le_16(unsigned char const *buffer,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * size_t len, uint crc32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) movdqa (BUF), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) movdqa 0x10(BUF), %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) movdqa 0x20(BUF), %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) movdqa 0x30(BUF), %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) movd CRC, CONSTANT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) pxor CONSTANT, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) sub $0x40, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) add $0x40, BUF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) cmp $0x40, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) jb less_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) movdqa .Lconstant_R2R1(%rip), CONSTANT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) movdqa .Lconstant_R2R1, CONSTANT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) loop_64:/* 64 bytes Full cache line folding */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) prefetchnta 0x40(BUF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) movdqa %xmm1, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) movdqa %xmm2, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) movdqa %xmm3, %xmm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) movdqa %xmm4, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) pclmulqdq $0x00, CONSTANT, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) pclmulqdq $0x00, CONSTANT, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) pclmulqdq $0x00, CONSTANT, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) pclmulqdq $0x00, CONSTANT, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) pclmulqdq $0x11, CONSTANT, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) pclmulqdq $0x11, CONSTANT, %xmm6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) pclmulqdq $0x11, CONSTANT, %xmm7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) pclmulqdq $0x11, CONSTANT, %xmm8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) pxor %xmm5, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) pxor %xmm6, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) pxor %xmm7, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) pxor %xmm8, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) /* xmm8 unsupported for x32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) movdqa %xmm4, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) pclmulqdq $0x00, CONSTANT, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) pclmulqdq $0x11, CONSTANT, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) pxor %xmm5, %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) pxor (BUF), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) pxor 0x10(BUF), %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) pxor 0x20(BUF), %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) pxor 0x30(BUF), %xmm4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) sub $0x40, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) add $0x40, BUF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) cmp $0x40, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) jge loop_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) less_64:/* Folding cache line into 128bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) movdqa .Lconstant_R4R3(%rip), CONSTANT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) movdqa .Lconstant_R4R3, CONSTANT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) prefetchnta (BUF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) movdqa %xmm1, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) pclmulqdq $0x00, CONSTANT, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) pclmulqdq $0x11, CONSTANT, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) pxor %xmm5, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) pxor %xmm2, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) movdqa %xmm1, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) pclmulqdq $0x00, CONSTANT, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) pclmulqdq $0x11, CONSTANT, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) pxor %xmm5, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) pxor %xmm3, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) movdqa %xmm1, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) pclmulqdq $0x00, CONSTANT, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) pclmulqdq $0x11, CONSTANT, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) pxor %xmm5, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) pxor %xmm4, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) cmp $0x10, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) jb fold_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) loop_16:/* Folding rest buffer into 128bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) movdqa %xmm1, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) pclmulqdq $0x00, CONSTANT, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) pclmulqdq $0x11, CONSTANT, %xmm5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) pxor %xmm5, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) pxor (BUF), %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) sub $0x10, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) add $0x10, BUF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) cmp $0x10, LEN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) jge loop_16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) fold_64:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) /* perform the last 64 bit fold, also adds 32 zeroes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * to the input stream */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) psrldq $0x08, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) pxor CONSTANT, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) /* final 32-bit fold */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) movdqa %xmm1, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) movdqa .Lconstant_R5(%rip), CONSTANT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) movdqa .Lconstant_mask32(%rip), %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) movdqa .Lconstant_R5, CONSTANT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) movdqa .Lconstant_mask32, %xmm3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) psrldq $0x04, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) pand %xmm3, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) pclmulqdq $0x00, CONSTANT, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) pxor %xmm2, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) #ifdef __x86_64__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) movdqa .Lconstant_RUpoly(%rip), CONSTANT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) movdqa .Lconstant_RUpoly, CONSTANT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) movdqa %xmm1, %xmm2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) pand %xmm3, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) pclmulqdq $0x10, CONSTANT, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) pand %xmm3, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) pclmulqdq $0x00, CONSTANT, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) pxor %xmm2, %xmm1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) pextrd $0x01, %xmm1, %eax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) SYM_FUNC_END(crc32_pclmul_le_16)