^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Hardware-accelerated CRC-32 variants for Linux on z Systems
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Use the z/Architecture Vector Extension Facility to accelerate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * computing of bitreflected CRC-32 checksums for IEEE 802.3 Ethernet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * and Castagnoli.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * This CRC-32 implementation algorithm is bitreflected and processes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * the least-significant bit first (Little-Endian).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Copyright IBM Corp. 2015
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * Author(s): Hendrik Brueckner <brueckner@linux.vnet.ibm.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <asm/nospec-insn.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #include <asm/vx-insn.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) /* Vector register range containing CRC-32 constants */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #define CONST_PERM_LE2BE %v9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define CONST_R2R1 %v10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define CONST_R4R3 %v11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define CONST_R5 %v12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define CONST_RU_POLY %v13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #define CONST_CRC_POLY %v14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) .data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) .align 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * The CRC-32 constant block contains reduction constants to fold and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * process particular chunks of the input data stream in parallel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * For the CRC-32 variants, the constants are precomputed according to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * these definitions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * R1 = [(x4*128+32 mod P'(x) << 32)]' << 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * R2 = [(x4*128-32 mod P'(x) << 32)]' << 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * R3 = [(x128+32 mod P'(x) << 32)]' << 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * R4 = [(x128-32 mod P'(x) << 32)]' << 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * R5 = [(x64 mod P'(x) << 32)]' << 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * R6 = [(x32 mod P'(x) << 32)]' << 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * The bitreflected Barret reduction constant, u', is defined as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * the bit reversal of floor(x**64 / P(x)).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * where P(x) is the polynomial in the normal domain and the P'(x) is the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * polynomial in the reversed (bitreflected) domain.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * CRC-32 (IEEE 802.3 Ethernet, ...) polynomials:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * P(x) = 0x04C11DB7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * P'(x) = 0xEDB88320
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * CRC-32C (Castagnoli) polynomials:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * P(x) = 0x1EDC6F41
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) * P'(x) = 0x82F63B78
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) .Lconstants_CRC_32_LE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) .quad 0x1c6e41596, 0x154442bd4 # R2, R1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) .quad 0x0ccaa009e, 0x1751997d0 # R4, R3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) .octa 0x163cd6124 # R5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) .octa 0x1F7011641 # u'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) .octa 0x1DB710641 # P'(x) << 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) .Lconstants_CRC_32C_LE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) .octa 0x0F0E0D0C0B0A09080706050403020100 # BE->LE mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) .quad 0x09e4addf8, 0x740eef02 # R2, R1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) .quad 0x14cd00bd6, 0xf20c0dfe # R4, R3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) .octa 0x0dd45aab8 # R5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) .octa 0x0dea713f1 # u'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) .octa 0x105ec76f0 # P'(x) << 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) .previous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) GEN_BR_THUNK %r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * The CRC-32 functions use these calling conventions:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * Parameters:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) * %r2: Initial CRC value, typically ~0; and final CRC (return) value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) * %r3: Input buffer pointer, performance might be improved if the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) * buffer is on a doubleword boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) * %r4: Length of the buffer, must be 64 bytes or greater.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * Register usage:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) * %r5: CRC-32 constant pool base pointer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) * V0: Initial CRC value and intermediate constants and results.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) * V1..V4: Data for CRC computation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) * V5..V8: Next data chunks that are fetched from the input buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) * V9: Constant for BE->LE conversion and shift operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) * V10..V14: CRC-32 constants.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) ENTRY(crc32_le_vgfm_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) larl %r5,.Lconstants_CRC_32_LE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) j crc32_le_vgfm_generic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) ENDPROC(crc32_le_vgfm_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) ENTRY(crc32c_le_vgfm_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) larl %r5,.Lconstants_CRC_32C_LE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) j crc32_le_vgfm_generic
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) ENDPROC(crc32c_le_vgfm_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) ENTRY(crc32_le_vgfm_generic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) /* Load CRC-32 constants */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) VLM CONST_PERM_LE2BE,CONST_CRC_POLY,0,%r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * Load the initial CRC value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) * The CRC value is loaded into the rightmost word of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * vector register and is later XORed with the LSB portion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) * of the loaded input data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) VZERO %v0 /* Clear V0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) VLVGF %v0,%r2,3 /* Load CRC into rightmost word */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) /* Load a 64-byte data chunk and XOR with CRC */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) VLM %v1,%v4,0,%r3 /* 64-bytes into V1..V4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) VPERM %v1,%v1,%v1,CONST_PERM_LE2BE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) VPERM %v2,%v2,%v2,CONST_PERM_LE2BE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) VPERM %v3,%v3,%v3,CONST_PERM_LE2BE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) VPERM %v4,%v4,%v4,CONST_PERM_LE2BE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) VX %v1,%v0,%v1 /* V1 ^= CRC */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) aghi %r3,64 /* BUF = BUF + 64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) aghi %r4,-64 /* LEN = LEN - 64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) cghi %r4,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) jl .Lless_than_64bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) .Lfold_64bytes_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) /* Load the next 64-byte data chunk into V5 to V8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) VLM %v5,%v8,0,%r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) VPERM %v5,%v5,%v5,CONST_PERM_LE2BE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) VPERM %v6,%v6,%v6,CONST_PERM_LE2BE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) VPERM %v7,%v7,%v7,CONST_PERM_LE2BE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) VPERM %v8,%v8,%v8,CONST_PERM_LE2BE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) * Perform a GF(2) multiplication of the doublewords in V1 with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) * the R1 and R2 reduction constants in V0. The intermediate result
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * is then folded (accumulated) with the next data chunk in V5 and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) * stored in V1. Repeat this step for the register contents
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) * in V2, V3, and V4 respectively.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) VGFMAG %v1,CONST_R2R1,%v1,%v5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) VGFMAG %v2,CONST_R2R1,%v2,%v6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) VGFMAG %v3,CONST_R2R1,%v3,%v7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) VGFMAG %v4,CONST_R2R1,%v4,%v8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) aghi %r3,64 /* BUF = BUF + 64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) aghi %r4,-64 /* LEN = LEN - 64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) cghi %r4,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) jnl .Lfold_64bytes_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) .Lless_than_64bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) * Fold V1 to V4 into a single 128-bit value in V1. Multiply V1 with R3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) * and R4 and accumulating the next 128-bit chunk until a single 128-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) * value remains.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) VGFMAG %v1,CONST_R4R3,%v1,%v2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) VGFMAG %v1,CONST_R4R3,%v1,%v3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) VGFMAG %v1,CONST_R4R3,%v1,%v4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) cghi %r4,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) jl .Lfinal_fold
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) .Lfold_16bytes_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) VL %v2,0,,%r3 /* Load next data chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) VPERM %v2,%v2,%v2,CONST_PERM_LE2BE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) VGFMAG %v1,CONST_R4R3,%v1,%v2 /* Fold next data chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) aghi %r3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) aghi %r4,-16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) cghi %r4,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) jnl .Lfold_16bytes_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) .Lfinal_fold:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * Set up a vector register for byte shifts. The shift value must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * be loaded in bits 1-4 in byte element 7 of a vector register.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) * Shift by 8 bytes: 0x40
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * Shift by 4 bytes: 0x20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) VLEIB %v9,0x40,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * Prepare V0 for the next GF(2) multiplication: shift V0 by 8 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * to move R4 into the rightmost doubleword and set the leftmost
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * doubleword to 0x1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) VSRLB %v0,CONST_R4R3,%v9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) VLEIG %v0,1,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) * Compute GF(2) product of V1 and V0. The rightmost doubleword
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) * of V1 is multiplied with R4. The leftmost doubleword of V1 is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) * multiplied by 0x1 and is then XORed with rightmost product.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) * Implicitly, the intermediate leftmost product becomes padded
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) VGFMG %v1,%v0,%v1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * Now do the final 32-bit fold by multiplying the rightmost word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * in V1 with R5 and XOR the result with the remaining bits in V1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * To achieve this by a single VGFMAG, right shift V1 by a word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * and store the result in V2 which is then accumulated. Use the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * vector unpack instruction to load the rightmost half of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) * doubleword into the rightmost doubleword element of V1; the other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * half is loaded in the leftmost doubleword.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * The vector register with CONST_R5 contains the R5 constant in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * rightmost doubleword and the leftmost doubleword is zero to ignore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * the leftmost product of V1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) VLEIB %v9,0x20,7 /* Shift by words */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) VSRLB %v2,%v1,%v9 /* Store remaining bits in V2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) VUPLLF %v1,%v1 /* Split rightmost doubleword */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) VGFMAG %v1,CONST_R5,%v1,%v2 /* V1 = (V1 * R5) XOR V2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) * Apply a Barret reduction to compute the final 32-bit CRC value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) * The input values to the Barret reduction are the degree-63 polynomial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * in V1 (R(x)), degree-32 generator polynomial, and the reduction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) * constant u. The Barret reduction result is the CRC value of R(x) mod
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) * P(x).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * The Barret reduction algorithm is defined as:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) * 1. T1(x) = floor( R(x) / x^32 ) GF2MUL u
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) * 2. T2(x) = floor( T1(x) / x^32 ) GF2MUL P(x)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) * 3. C(x) = R(x) XOR T2(x) mod x^32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) * Note: The leftmost doubleword of vector register containing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) * CONST_RU_POLY is zero and, thus, the intermediate GF(2) product
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) * is zero and does not contribute to the final result.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) /* T1(x) = floor( R(x) / x^32 ) GF2MUL u */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) VUPLLF %v2,%v1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) VGFMG %v2,CONST_RU_POLY,%v2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * Compute the GF(2) product of the CRC polynomial with T1(x) in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) * V2 and XOR the intermediate result, T2(x), with the value in V1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * The final result is stored in word element 2 of V2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) VUPLLF %v2,%v2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) VGFMAG %v2,CONST_CRC_POLY,%v2,%v1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) .Ldone:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) VLGVF %r2,%v2,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) BR_EX %r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) ENDPROC(crc32_le_vgfm_generic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) .previous