^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * INET An implementation of the TCP/IP protocol suite for the LINUX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * operating system. INET is implemented using the BSD Socket
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * interface as the means of communication with the user level.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * IP/TCP/UDP checksumming routines
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Xtensa version: Copyright (C) 2001 Tensilica, Inc. by Kevin Chea
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Optimized by Joe Taylor
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <asm/asmmacro.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <asm/core.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * computes a partial checksum, e.g. for TCP/UDP fragments
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * unsigned int csum_partial(const unsigned char *buf, int len,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * unsigned int sum);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * a2 = buf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * a3 = len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * a4 = sum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * This function assumes 2- or 4-byte alignment. Other alignments will fail!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) /* ONES_ADD converts twos-complement math to ones-complement. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #define ONES_ADD(sum, val) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) add sum, sum, val ; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) bgeu sum, val, 99f ; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) addi sum, sum, 1 ; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) 99: ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) ENTRY(csum_partial)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * Experiments with Ethernet and SLIP connections show that buf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * is aligned on either a 2-byte or 4-byte boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) abi_entry_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) extui a5, a2, 0, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) bnez a5, 8f /* branch if 2-byte aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* Fall-through on common case, 4-byte alignment */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) srli a5, a3, 5 /* 32-byte chunks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) loopgtz a5, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) beqz a5, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) slli a5, a5, 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) add a5, a5, a2 /* a5 = end of last 32-byte chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) .Loop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) l32i a6, a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) l32i a7, a2, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) ONES_ADD(a4, a6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) ONES_ADD(a4, a7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) l32i a6, a2, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) l32i a7, a2, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) ONES_ADD(a4, a6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) ONES_ADD(a4, a7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) l32i a6, a2, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) l32i a7, a2, 20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) ONES_ADD(a4, a6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) ONES_ADD(a4, a7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) l32i a6, a2, 24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) l32i a7, a2, 28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) ONES_ADD(a4, a6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) ONES_ADD(a4, a7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) addi a2, a2, 4*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) blt a2, a5, .Loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) extui a5, a3, 2, 3 /* remaining 4-byte chunks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) loopgtz a5, 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) beqz a5, 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) slli a5, a5, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) add a5, a5, a2 /* a5 = end of last 4-byte chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) .Loop2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) l32i a6, a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) ONES_ADD(a4, a6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) addi a2, a2, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) blt a2, a5, .Loop2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) _bbci.l a3, 1, 5f /* remaining 2-byte chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) l16ui a6, a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) ONES_ADD(a4, a6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) addi a2, a2, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) _bbci.l a3, 0, 7f /* remaining 1-byte chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 6: l8ui a6, a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) #ifdef __XTENSA_EB__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) slli a6, a6, 8 /* load byte into bits 8..15 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) ONES_ADD(a4, a6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 7:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) mov a2, a4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) /* uncommon case, buf is 2-byte aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) beqz a3, 7b /* branch if len == 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) beqi a3, 1, 6b /* branch if len == 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) extui a5, a2, 0, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) bnez a5, 8f /* branch if 1-byte aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) l16ui a6, a2, 0 /* common case, len >= 2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) ONES_ADD(a4, a6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) addi a2, a2, 2 /* adjust buf */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) addi a3, a3, -2 /* adjust len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) j 1b /* now buf is 4-byte aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /* case: odd-byte aligned, len > 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * This case is dog slow, so don't give us an odd address.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) * (I don't think this ever happens, but just in case.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) srli a5, a3, 2 /* 4-byte chunks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) loopgtz a5, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) beqz a5, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) slli a5, a5, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) add a5, a5, a2 /* a5 = end of last 4-byte chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) .Loop3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) l8ui a6, a2, 0 /* bits 24..31 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) l16ui a7, a2, 1 /* bits 8..23 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) l8ui a8, a2, 3 /* bits 0.. 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) #ifdef __XTENSA_EB__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) slli a6, a6, 24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) slli a8, a8, 24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) slli a7, a7, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) or a7, a7, a6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) or a7, a7, a8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) ONES_ADD(a4, a7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) addi a2, a2, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) blt a2, a5, .Loop3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) _bbci.l a3, 1, 3f /* remaining 2-byte chunk, still odd addr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) l8ui a6, a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) l8ui a7, a2, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) #ifdef __XTENSA_EB__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) slli a6, a6, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) slli a7, a7, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) or a7, a7, a6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) ONES_ADD(a4, a7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) addi a2, a2, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) j 5b /* branch to handle the remaining byte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) ENDPROC(csum_partial)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * Copy from ds while checksumming, otherwise like csum_partial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) unsigned int csum_partial_copy_generic (const char *src, char *dst, int len)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) a2 = src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) a3 = dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) a4 = len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) a5 = sum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) a8 = temp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) a9 = temp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) a10 = temp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) This function is optimized for 4-byte aligned addresses. Other
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) alignments work, but not nearly as efficiently.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) ENTRY(csum_partial_copy_generic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) abi_entry_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) movi a5, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) or a10, a2, a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) /* We optimize the following alignment tests for the 4-byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) aligned case. Two bbsi.l instructions might seem more optimal
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) (commented out below). However, both labels 5: and 3: are out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) of the imm8 range, so the assembler relaxes them into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) equivalent bbci.l, j combinations, which is actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) slower. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) extui a9, a10, 0, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) beqz a9, 1f /* branch if both are 4-byte aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) bbsi.l a10, 0, 5f /* branch if one address is odd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) j 3f /* one address is 2-byte aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) /* _bbsi.l a10, 0, 5f */ /* branch if odd address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) /* _bbsi.l a10, 1, 3f */ /* branch if 2-byte-aligned address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) /* src and dst are both 4-byte aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) srli a10, a4, 5 /* 32-byte chunks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) loopgtz a10, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) beqz a10, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) slli a10, a10, 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) add a10, a10, a2 /* a10 = end of last 32-byte src chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) .Loop5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) EX(10f) l32i a9, a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) EX(10f) l32i a8, a2, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) EX(10f) s32i a9, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) EX(10f) s32i a8, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) ONES_ADD(a5, a9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) ONES_ADD(a5, a8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) EX(10f) l32i a9, a2, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) EX(10f) l32i a8, a2, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) EX(10f) s32i a9, a3, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) EX(10f) s32i a8, a3, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) ONES_ADD(a5, a9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) ONES_ADD(a5, a8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) EX(10f) l32i a9, a2, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) EX(10f) l32i a8, a2, 20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) EX(10f) s32i a9, a3, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) EX(10f) s32i a8, a3, 20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) ONES_ADD(a5, a9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) ONES_ADD(a5, a8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) EX(10f) l32i a9, a2, 24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) EX(10f) l32i a8, a2, 28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) EX(10f) s32i a9, a3, 24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) EX(10f) s32i a8, a3, 28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) ONES_ADD(a5, a9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) ONES_ADD(a5, a8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) addi a2, a2, 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) addi a3, a3, 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) blt a2, a10, .Loop5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) extui a10, a4, 2, 3 /* remaining 4-byte chunks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) extui a4, a4, 0, 2 /* reset len for general-case, 2-byte chunks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) loopgtz a10, 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) beqz a10, 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) slli a10, a10, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) add a10, a10, a2 /* a10 = end of last 4-byte src chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) .Loop6:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) EX(10f) l32i a9, a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) EX(10f) s32i a9, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) ONES_ADD(a5, a9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) addi a2, a2, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) addi a3, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) blt a2, a10, .Loop6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) Control comes to here in two cases: (1) It may fall through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) to here from the 4-byte alignment case to process, at most,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) one 2-byte chunk. (2) It branches to here from above if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) either src or dst is 2-byte aligned, and we process all bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) here, except for perhaps a trailing odd byte. It's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) inefficient, so align your addresses to 4-byte boundaries.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) a2 = src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) a3 = dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) a4 = len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) a5 = sum
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) srli a10, a4, 1 /* 2-byte chunks */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) loopgtz a10, 4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) beqz a10, 4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) slli a10, a10, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) add a10, a10, a2 /* a10 = end of last 2-byte src chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) .Loop7:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) EX(10f) l16ui a9, a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) EX(10f) s16i a9, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) ONES_ADD(a5, a9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) addi a2, a2, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) addi a3, a3, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) blt a2, a10, .Loop7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) /* This section processes a possible trailing odd byte. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) _bbci.l a4, 0, 8f /* 1-byte chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) EX(10f) l8ui a9, a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) EX(10f) s8i a9, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) #ifdef __XTENSA_EB__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) slli a9, a9, 8 /* shift byte to bits 8..15 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) ONES_ADD(a5, a9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) mov a2, a5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) /* Control branch to here when either src or dst is odd. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) process all bytes using 8-bit accesses. Grossly inefficient,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) so don't feed us an odd address. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) srli a10, a4, 1 /* handle in pairs for 16-bit csum */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) loopgtz a10, 6f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) beqz a10, 6f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) slli a10, a10, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) add a10, a10, a2 /* a10 = end of last odd-aligned, 2-byte src chunk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) .Loop8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) EX(10f) l8ui a9, a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) EX(10f) l8ui a8, a2, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) EX(10f) s8i a9, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) EX(10f) s8i a8, a3, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) #ifdef __XTENSA_EB__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) slli a9, a9, 8 /* combine into a single 16-bit value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) #else /* for checksum computation */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) slli a8, a8, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) or a9, a9, a8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) ONES_ADD(a5, a9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) addi a2, a2, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) addi a3, a3, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) blt a2, a10, .Loop8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 6:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) j 4b /* process the possible trailing odd byte */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) ENDPROC(csum_partial_copy_generic)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) # Exception handler:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) .section .fixup, "ax"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 10:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) movi a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) .previous