^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* checksum.S: Sparc V9 optimized checksum code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright(C) 1995 Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright(C) 1995 Miguel de Icaza
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright(C) 1996, 2000 David S. Miller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Copyright(C) 1997 Jakub Jelinek
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * derived from:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Linux/Alpha checksum c-code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Linux/ix86 inline checksum assembly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * RFC1071 Computing the Internet Checksum (esp. Jacobsons m68k code)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * David Mosberger-Tang for optimized reference c-code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * BSD4.4 portable checksum routine
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) csum_partial_fix_alignment:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) /* We checked for zero length already, so there must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * at least one byte.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) be,pt %icc, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) ldub [%o0 + 0x00], %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) add %o0, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) sub %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) 1: andcc %o0, 0x2, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) be,pn %icc, csum_partial_post_align
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) cmp %o1, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) blu,pn %icc, csum_partial_end_cruft
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) lduh [%o0 + 0x00], %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) add %o0, 2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) sub %o1, 2, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) ba,pt %xcc, csum_partial_post_align
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) add %o5, %o4, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) .align 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) .globl csum_partial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) .type csum_partial,#function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) EXPORT_SYMBOL(csum_partial)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) csum_partial: /* %o0=buff, %o1=len, %o2=sum */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) prefetch [%o0 + 0x000], #n_reads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) clr %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) prefetch [%o0 + 0x040], #n_reads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) brz,pn %o1, csum_partial_finish
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) andcc %o0, 0x3, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) /* We "remember" whether the lowest bit in the address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * was set in %g7. Because if it is, we have to swap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * upper and lower 8 bit fields of the sum we calculate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) bne,pn %icc, csum_partial_fix_alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) andcc %o0, 0x1, %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) csum_partial_post_align:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) prefetch [%o0 + 0x080], #n_reads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) andncc %o1, 0x3f, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) prefetch [%o0 + 0x0c0], #n_reads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) sub %o1, %o3, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) brz,pn %o3, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) prefetch [%o0 + 0x100], #n_reads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) /* So that we don't need to use the non-pairing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * add-with-carry instructions we accumulate 32-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * values into a 64-bit register. At the end of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * loop we fold it down to 32-bits and so on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) prefetch [%o0 + 0x140], #n_reads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) 1: lduw [%o0 + 0x00], %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) lduw [%o0 + 0x04], %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) lduw [%o0 + 0x08], %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) add %o4, %o5, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) lduw [%o0 + 0x0c], %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) add %o4, %g1, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) lduw [%o0 + 0x10], %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) add %o4, %g2, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) lduw [%o0 + 0x14], %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) add %o4, %g3, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) lduw [%o0 + 0x18], %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) add %o4, %o5, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) lduw [%o0 + 0x1c], %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) add %o4, %g1, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) lduw [%o0 + 0x20], %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) add %o4, %g2, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) lduw [%o0 + 0x24], %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) add %o4, %g3, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) lduw [%o0 + 0x28], %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) add %o4, %o5, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) lduw [%o0 + 0x2c], %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) add %o4, %g1, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) lduw [%o0 + 0x30], %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) add %o4, %g2, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) lduw [%o0 + 0x34], %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) add %o4, %g3, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) lduw [%o0 + 0x38], %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) add %o4, %o5, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) lduw [%o0 + 0x3c], %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) add %o4, %g1, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) prefetch [%o0 + 0x180], #n_reads
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) add %o4, %g2, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) subcc %o3, 0x40, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) add %o0, 0x40, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) add %o4, %g3, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 2: and %o1, 0x3c, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) brz,pn %o3, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) sub %o1, %o3, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 1: lduw [%o0 + 0x00], %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) subcc %o3, 0x4, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) add %o0, 0x4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) add %o4, %o5, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) /* fold 64-->32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) srlx %o4, 32, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) srl %o4, 0, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) add %o4, %o5, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) srlx %o4, 32, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) srl %o4, 0, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) add %o4, %o5, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) /* fold 32-->16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) sethi %hi(0xffff0000), %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) srl %o4, 16, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) andn %o4, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) add %o5, %g2, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) srl %o4, 16, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) andn %o4, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) add %o5, %g2, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) csum_partial_end_cruft:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) /* %o4 has the 16-bit sum we have calculated so-far. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) cmp %o1, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) blu,pt %icc, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) lduh [%o0 + 0x00], %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) sub %o1, 2, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) add %o0, 2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) add %o4, %o5, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 1: brz,pt %o1, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) ldub [%o0 + 0x00], %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) sub %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) add %o0, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) sllx %o5, 8, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) add %o4, %o5, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) /* fold 32-->16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) sethi %hi(0xffff0000), %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) srl %o4, 16, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) andn %o4, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) add %o5, %g2, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) srl %o4, 16, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) andn %o4, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) add %o5, %g2, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 1: brz,pt %g7, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) /* We started with an odd byte, byte-swap the result. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) srl %o4, 8, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) and %o4, 0xff, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) sll %g1, 8, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) or %o5, %g1, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 1: addcc %o2, %o4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) addc %g0, %o2, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) csum_partial_finish:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) srl %o2, 0, %o0