^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * This file contains assembly-language implementations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * of IP-style 1's complement checksum routines.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <linux/sys.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <asm/processor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <asm/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <asm/ppc_asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * Computes the checksum of a memory block at buff, length len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * and adds in "sum" (32-bit).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * __csum_partial(r3=buff, r4=len, r5=sum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) _GLOBAL(__csum_partial)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) addic r0,r5,0 /* clear carry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) srdi. r6,r4,3 /* less than 8 bytes? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) beq .Lcsum_tail_word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * If only halfword aligned, align to a double word. Since odd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * aligned addresses should be rare and they would require more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * work to calculate the correct checksum, we ignore that case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * and take the potential slowdown of unaligned loads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) beq .Lcsum_aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) li r7,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) sub r6,r7,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) mtctr r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) lhz r6,0(r3) /* align to doubleword */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) subi r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) addi r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) bdnz 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) .Lcsum_aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * We unroll the loop such that each iteration is 64 bytes with an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * entry and exit limb of 64 bytes, meaning a minimum size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * 128 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) srdi. r6,r4,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) beq .Lcsum_tail_doublewords /* len < 128 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) srdi r6,r4,6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) subi r6,r6,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) mtctr r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) stdu r1,-STACKFRAMESIZE(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) std r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) std r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) std r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) ld r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) ld r9,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) ld r10,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) ld r11,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * On POWER6 and POWER7 back to back adde instructions take 2 cycles
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) * because of the XER dependency. This means the fastest this loop can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) * go is 16 cycles per iteration. The scheduling of the loop below has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) * been shown to hit this on both POWER6 and POWER7.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) ld r12,32(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) ld r14,40(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) adde r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) ld r15,48(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) ld r16,56(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) addi r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) adde r0,r0,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) adde r0,r0,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) adde r0,r0,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) adde r0,r0,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) adde r0,r0,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) ld r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) ld r9,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) adde r0,r0,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) ld r10,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) ld r11,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) bdnz 2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) ld r12,32(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) ld r14,40(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) adde r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) ld r15,48(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) ld r16,56(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) addi r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) adde r0,r0,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) adde r0,r0,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) adde r0,r0,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) adde r0,r0,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) adde r0,r0,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) adde r0,r0,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) ld r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) ld r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) ld r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) addi r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) andi. r4,r4,63
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) .Lcsum_tail_doublewords: /* Up to 127 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) srdi. r6,r4,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) beq .Lcsum_tail_word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) mtctr r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) ld r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) addi r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) bdnz 3b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) andi. r4,r4,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) .Lcsum_tail_word: /* Up to 7 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) srdi. r6,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) beq .Lcsum_tail_halfword
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) lwz r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) addi r3,r3,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) subi r4,r4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) .Lcsum_tail_halfword: /* Up to 3 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) srdi. r6,r4,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) beq .Lcsum_tail_byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) lhz r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) addi r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) subi r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) .Lcsum_tail_byte: /* Up to 1 byte to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) andi. r6,r4,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) beq .Lcsum_finish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) lbz r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) #ifdef __BIG_ENDIAN__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) sldi r9,r6,8 /* Pad the byte out to 16 bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) adde r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) .Lcsum_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) addze r0,r0 /* add in final carry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) rldicl r4,r0,32,0 /* fold two 32 bit halves together */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) add r3,r4,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) srdi r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) EXPORT_SYMBOL(__csum_partial)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) .macro srcnr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 100:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) EX_TABLE(100b,.Lerror_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) .macro source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 150:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) EX_TABLE(150b,.Lerror)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) .macro dstnr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 200:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) EX_TABLE(200b,.Lerror_nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) .macro dest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 250:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) EX_TABLE(250b,.Lerror)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * Computes the checksum of a memory block at src, length len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) * and adds in 0xffffffff (32-bit), while copying the block to dst.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) * If an access exception occurs, it returns 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) * csum_partial_copy_generic(r3=src, r4=dst, r5=len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) _GLOBAL(csum_partial_copy_generic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) li r6,-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) addic r0,r6,0 /* clear carry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) srdi. r6,r5,3 /* less than 8 bytes? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) beq .Lcopy_tail_word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) * If only halfword aligned, align to a double word. Since odd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) * aligned addresses should be rare and they would require more
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * work to calculate the correct checksum, we ignore that case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * and take the potential slowdown of unaligned loads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) * If the source and destination are relatively unaligned we only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * align the source. This keeps things simple.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) rldicl. r6,r3,64-1,64-2 /* r6 = (r3 >> 1) & 0x3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) beq .Lcopy_aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) li r9,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) sub r6,r9,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) mtctr r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) srcnr; lhz r6,0(r3) /* align to doubleword */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) subi r5,r5,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) addi r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) dstnr; sth r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) addi r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) bdnz 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) .Lcopy_aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) * We unroll the loop such that each iteration is 64 bytes with an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) * entry and exit limb of 64 bytes, meaning a minimum size of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) * 128 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) srdi. r6,r5,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) beq .Lcopy_tail_doublewords /* len < 128 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) srdi r6,r5,6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) subi r6,r6,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) mtctr r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) stdu r1,-STACKFRAMESIZE(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) std r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) std r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) std r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) source; ld r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) source; ld r9,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) source; ld r10,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) source; ld r11,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * On POWER6 and POWER7 back to back adde instructions take 2 cycles
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) * because of the XER dependency. This means the fastest this loop can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * go is 16 cycles per iteration. The scheduling of the loop below has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * been shown to hit this on both POWER6 and POWER7.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) source; ld r12,32(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) source; ld r14,40(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) adde r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) source; ld r15,48(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) source; ld r16,56(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) addi r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) adde r0,r0,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) dest; std r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) dest; std r9,8(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) adde r0,r0,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) dest; std r10,16(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) dest; std r11,24(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) adde r0,r0,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) dest; std r12,32(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) dest; std r14,40(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) adde r0,r0,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) dest; std r15,48(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) dest; std r16,56(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) addi r4,r4,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) adde r0,r0,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) source; ld r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) source; ld r9,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) adde r0,r0,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) source; ld r10,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) source; ld r11,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) bdnz 2b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) source; ld r12,32(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) source; ld r14,40(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) adde r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) source; ld r15,48(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) source; ld r16,56(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) addi r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) adde r0,r0,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) dest; std r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) dest; std r9,8(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) adde r0,r0,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) dest; std r10,16(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) dest; std r11,24(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) adde r0,r0,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) dest; std r12,32(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) dest; std r14,40(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) adde r0,r0,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) dest; std r15,48(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) dest; std r16,56(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) addi r4,r4,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) adde r0,r0,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) adde r0,r0,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) ld r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) ld r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) ld r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) addi r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) andi. r5,r5,63
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) .Lcopy_tail_doublewords: /* Up to 127 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) srdi. r6,r5,3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) beq .Lcopy_tail_word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) mtctr r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) srcnr; ld r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) addi r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) dstnr; std r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) addi r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) bdnz 3b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) andi. r5,r5,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) .Lcopy_tail_word: /* Up to 7 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) srdi. r6,r5,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) beq .Lcopy_tail_halfword
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) srcnr; lwz r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) addi r3,r3,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) dstnr; stw r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) addi r4,r4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) subi r5,r5,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) .Lcopy_tail_halfword: /* Up to 3 bytes to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) srdi. r6,r5,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) beq .Lcopy_tail_byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) srcnr; lhz r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) addi r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) dstnr; sth r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) addi r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) subi r5,r5,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) .Lcopy_tail_byte: /* Up to 1 byte to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) andi. r6,r5,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) beq .Lcopy_finish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) srcnr; lbz r6,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) #ifdef __BIG_ENDIAN__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) sldi r9,r6,8 /* Pad the byte out to 16 bits */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) adde r0,r0,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) adde r0,r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) dstnr; stb r6,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) .Lcopy_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) addze r0,r0 /* add in final carry */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) rldicl r4,r0,32,0 /* fold two 32 bit halves together */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) add r3,r4,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) srdi r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) .Lerror:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) ld r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) ld r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) ld r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) addi r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) .Lerror_nr:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) li r3,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) EXPORT_SYMBOL(csum_partial_copy_generic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) * __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) * const struct in6_addr *daddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) * __u32 len, __u8 proto, __wsum sum)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) _GLOBAL(csum_ipv6_magic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) ld r8, 0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) ld r9, 8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) add r5, r5, r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) addc r0, r8, r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) ld r10, 0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) ld r11, 8(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) #ifdef CONFIG_CPU_LITTLE_ENDIAN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) rotldi r5, r5, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) adde r0, r0, r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) add r5, r5, r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) adde r0, r0, r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) adde r0, r0, r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) addze r0, r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) rotldi r3, r0, 32 /* fold two 32 bit halves together */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) add r3, r0, r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) srdi r0, r3, 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) rotlwi r3, r0, 16 /* fold two 16 bit halves together */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) add r3, r0, r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) not r3, r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) rlwinm r3, r3, 16, 16, 31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) EXPORT_SYMBOL(csum_ipv6_magic)