^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* NGmemcpy.S: Niagara optimized memcpy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #ifdef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <asm/asi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <asm/thread_info.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #define GLOBAL_SPARE %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #define RESTORE_ASI(TMP) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) ldub [%g6 + TI_CURRENT_DS], TMP; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) wr TMP, 0x0, %asi;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #define GLOBAL_SPARE %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #define RESTORE_ASI(TMP) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) wr %g0, ASI_PNF, %asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #ifdef __sparc_v9__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define SAVE_AMOUNT 128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define SAVE_AMOUNT 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #ifndef STORE_ASI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #ifndef EX_LD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define EX_LD(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #ifndef EX_ST
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #define EX_ST(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #ifndef LOAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #ifndef MEMCPY_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #define LOAD(type,addr,dest) type [addr], dest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #define LOAD(type,addr,dest) type##a [addr] 0x80, dest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #ifndef LOAD_TWIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #define LOAD_TWIN(addr_reg,dest0,dest1) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) ldda [addr_reg] ASI_BLK_INIT_QUAD_LDD_P, dest0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #ifndef STORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #define STORE(type,src,addr) type src, [addr]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #ifndef STORE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #define STORE_INIT(src,addr) stxa src, [addr] %asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #define STORE_INIT(src,addr) stx src, [addr + 0x00]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #ifndef FUNC_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #define FUNC_NAME NGmemcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #ifndef PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #define PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #ifndef XCC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #define XCC xcc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) .register %g2,#scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) .register %g3,#scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #ifndef EX_RETVAL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #define EX_RETVAL(x) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) __restore_asi:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) wr %g0, ASI_AIUS, %asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) restore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) ENTRY(NG_ret_i2_plus_i4_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) add %i2, %i5, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) ENDPROC(NG_ret_i2_plus_i4_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) ENTRY(NG_ret_i2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) add %i2, %g1, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) ENDPROC(NG_ret_i2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) ENTRY(NG_ret_i2_plus_g1_minus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) sub %g1, 8, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) add %i2, %g1, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) ENDPROC(NG_ret_i2_plus_g1_minus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) ENTRY(NG_ret_i2_plus_g1_minus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) sub %g1, 16, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) add %i2, %g1, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) ENDPROC(NG_ret_i2_plus_g1_minus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) ENTRY(NG_ret_i2_plus_g1_minus_24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) sub %g1, 24, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) add %i2, %g1, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) ENDPROC(NG_ret_i2_plus_g1_minus_24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) ENTRY(NG_ret_i2_plus_g1_minus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) sub %g1, 32, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) add %i2, %g1, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) ENDPROC(NG_ret_i2_plus_g1_minus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) ENTRY(NG_ret_i2_plus_g1_minus_40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) sub %g1, 40, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) add %i2, %g1, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) ENDPROC(NG_ret_i2_plus_g1_minus_40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) ENTRY(NG_ret_i2_plus_g1_minus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) sub %g1, 48, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) add %i2, %g1, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) ENDPROC(NG_ret_i2_plus_g1_minus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) ENTRY(NG_ret_i2_plus_g1_minus_56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) sub %g1, 56, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) add %i2, %g1, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) ENDPROC(NG_ret_i2_plus_g1_minus_56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) ENTRY(NG_ret_i2_plus_i4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) add %i2, %i4, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) ENDPROC(NG_ret_i2_plus_i4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) ENTRY(NG_ret_i2_plus_i4_minus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) sub %i4, 8, %i4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) add %i2, %i4, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) ENDPROC(NG_ret_i2_plus_i4_minus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) ENTRY(NG_ret_i2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) add %i2, 8, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) ENDPROC(NG_ret_i2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) ENTRY(NG_ret_i2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) add %i2, 4, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) ENDPROC(NG_ret_i2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) ENTRY(NG_ret_i2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) add %i2, 1, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) ENDPROC(NG_ret_i2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) ENTRY(NG_ret_i2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) add %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) add %i2, %g1, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) ENDPROC(NG_ret_i2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) ENTRY(NG_ret_i2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) mov %i2, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) ENDPROC(NG_ret_i2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) ENTRY(NG_ret_i2_and_7_plus_i4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) and %i2, 7, %i2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) add %i2, %i4, %i0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) ENDPROC(NG_ret_i2_and_7_plus_i4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) .globl FUNC_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) .type FUNC_NAME,#function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) save %sp, -SAVE_AMOUNT, %sp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) srlx %i2, 31, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) cmp %g2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) tne %xcc, 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) mov %i0, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) cmp %i2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) be,pn %XCC, 85f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) or %o0, %i1, %i3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) cmp %i2, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) blu,a,pn %XCC, 80f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) or %i3, %i2, %i3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) /* 2 blocks (128 bytes) is the minimum we can do the block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) * copy with. We need to ensure that we'll iterate at least
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) * once in the block copy loop. At worst we'll need to align
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) * the destination to a 64-byte boundary which can chew up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) * to (64 - 1) bytes from the length before we perform the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * block copy loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) cmp %i2, (2 * 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) blu,pt %XCC, 70f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) andcc %i3, 0x7, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) /* %o0: dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) * %i1: src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) * %i2: len (known to be >= 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) * The block copy loops will use %i4/%i5,%g2/%g3 as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) * temporaries while copying the data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) LOAD(prefetch, %i1, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) wr %g0, STORE_ASI, %asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) /* Align destination on 64-byte boundary. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) andcc %o0, (64 - 1), %i4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) be,pt %XCC, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) sub %i4, 64, %i4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) sub %g0, %i4, %i4 ! bytes to align dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) sub %i2, %i4, %i2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 1: subcc %i4, 1, %i4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) add %i1, 1, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) bne,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) add %o0, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) /* If the source is on a 16-byte boundary we can do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) * the direct block copy loop. If it is 8-byte aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) * we can do the 16-byte loads offset by -8 bytes and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) * init stores offset by one register.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * If the source is not even 8-byte aligned, we need to do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) * shifting and masking (basically integer faligndata).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) * The careful bit with init stores is that if we store
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) * to any part of the cache line we have to store the whole
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) * cacheline else we can end up with corrupt L2 cache line
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * contents. Since the loop works on 64-bytes of 64-byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * aligned store data at a time, this is easy to ensure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) andcc %i1, (16 - 1), %i4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) andn %i2, (64 - 1), %g1 ! block copy loop iterator
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) be,pt %XCC, 50f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) sub %i2, %g1, %i2 ! final sub-block copy bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) cmp %i4, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) be,pt %XCC, 10f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) sub %i1, %i4, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) /* Neither 8-byte nor 16-byte aligned, shift and mask. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) and %i4, 0x7, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) sll GLOBAL_SPARE, 3, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) mov 64, %i5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) sub %i5, GLOBAL_SPARE, %i5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) mov 16, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) mov 32, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) mov 48, %o7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) mov 64, %i3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) bg,pn %XCC, 9f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) #define MIX_THREE_WORDS(WORD1, WORD2, WORD3, PRE_SHIFT, POST_SHIFT, TMP) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) sllx WORD1, POST_SHIFT, WORD1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) srlx WORD2, PRE_SHIFT, TMP; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) sllx WORD2, POST_SHIFT, WORD2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) or WORD1, TMP, WORD1; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) srlx WORD3, PRE_SHIFT, TMP; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) or WORD2, TMP, WORD2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) LOAD(prefetch, %i1 + %i3, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) add %i1, 64, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) bne,pt %XCC, 8b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) add %o0, 64, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) ba,pt %XCC, 60f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) add %i1, %i4, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) LOAD(prefetch, %i1 + %i3, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) add %i1, 64, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) bne,pt %XCC, 9b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) add %o0, 64, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) ba,pt %XCC, 60f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) add %i1, %i4, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 10: /* Destination is 64-byte aligned, source was only 8-byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) * aligned but it has been subtracted by 8 and we perform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) * one twin load ahead, then add 8 back into source when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) * we finish the loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) mov 16, %o7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) mov 32, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) mov 48, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) mov 64, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) LOAD(prefetch, %i1 + %o1, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) add %i1, 64, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) bne,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) add %o0, 64, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) ba,pt %XCC, 60f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) add %i1, 0x8, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 50: /* Destination is 64-byte aligned, and source is 16-byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) * aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) mov 16, %o7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) mov 32, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) mov 48, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) mov 64, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) LOAD(prefetch, %i1 + %o1, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) add %i1, 64, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) bne,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) add %o0, 64, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) /* fall through */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 60:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) membar #Sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) /* %i2 contains any final bytes still needed to be copied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) * over. If anything is left, we copy it one byte at a time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) RESTORE_ASI(%i3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) brz,pt %i2, 85f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) sub %o0, %i1, %i3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) ba,a,pt %XCC, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) 70: /* 16 < len <= 64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) bne,pn %XCC, 75f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) sub %o0, %i1, %i3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 72:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) andn %i2, 0xf, %i4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) and %i2, 0xf, %i2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 1: subcc %i4, 0x10, %i4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) add %i1, 0x08, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) sub %i1, 0x08, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) add %i1, 0x8, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) bgu,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) add %i1, 0x8, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 73: andcc %i2, 0x8, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) be,pt %XCC, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) sub %i2, 0x8, %i2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) add %i1, 0x8, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 1: andcc %i2, 0x4, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) be,pt %XCC, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) sub %i2, 0x4, %i2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) add %i1, 0x4, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 1: cmp %i2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) be,pt %XCC, 85f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) ba,pt %xcc, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 75:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) andcc %o0, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) sub %g1, 0x8, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) be,pn %icc, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) sub %g0, %g1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) sub %i2, %g1, %i2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 1: subcc %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) bgu,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) add %i1, 1, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 2: add %i1, %i3, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) andcc %i1, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) bne,pt %icc, 8f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) sll %g1, 3, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) cmp %i2, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) bgeu,pt %icc, 72b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) ba,a,pt %xcc, 73b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 8: mov 64, %i3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) andn %i1, 0x7, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) sub %i3, %g1, %i3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) andn %i2, 0x7, %i4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) sllx %g2, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) 1: add %i1, 0x8, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) subcc %i4, 0x8, %i4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) srlx %g3, %i3, %i5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) or %i5, %g2, %i5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) add %o0, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) bgu,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) sllx %g3, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) srl %g1, 3, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) andcc %i2, 0x7, %i2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) be,pn %icc, 85f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) add %i1, %g1, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) ba,pt %xcc, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) sub %o0, %i1, %i3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 80: /* 0 < len <= 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) andcc %i3, 0x3, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) bne,pn %XCC, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) sub %o0, %i1, %i3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) subcc %i2, 4, %i2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) bgu,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) add %i1, 4, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 85: ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) restore EX_RETVAL(%i0), %g0, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) .align 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 90:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) subcc %i2, 1, %i2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) bgu,pt %XCC, 90b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) add %i1, 1, %i1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) restore EX_RETVAL(%i0), %g0, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) .size FUNC_NAME, .-FUNC_NAME