^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* U3memcpy.S: UltraSparc-III optimized memcpy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #ifdef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <asm/visasm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <asm/asi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #define GLOBAL_SPARE %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #define ASI_BLK_P 0xf0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #define FPRS_FEF 0x04
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #ifdef MEMCPY_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define GLOBAL_SPARE %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #ifndef EX_LD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #define EX_LD(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #ifndef EX_LD_FP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #define EX_LD_FP(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #ifndef EX_ST
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #define EX_ST(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #ifndef EX_ST_FP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define EX_ST_FP(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #ifndef LOAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #define LOAD(type,addr,dest) type [addr], dest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #ifndef STORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #define STORE(type,src,addr) type src, [addr]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #ifndef STORE_BLK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) #define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #ifndef FUNC_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #define FUNC_NAME U3memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #ifndef PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #define PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #ifndef XCC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #define XCC xcc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) .register %g2,#scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) .register %g3,#scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) /* Special/non-trivial issues of this code:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) * 1) %o5 is preserved from VISEntryHalf to VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) * 2) Only low 32 FPU registers are used so that only the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) * lower half of the FPU register set is dirtied by this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) * code. This is especially important in the kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) * 3) This code never prefetches cachelines past the end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) * of the source buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #ifndef EX_RETVAL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #define EX_RETVAL(x) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) __restore_fp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) add %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) add %g2, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) ba,pt %xcc, __restore_fp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) add %o2, %g2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) ENTRY(U3_retl_o2_plus_g2_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) ba,pt %xcc, __restore_fp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) add %o2, %g2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) ENDPROC(U3_retl_o2_plus_g2_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) ENTRY(U3_retl_o2_plus_g2_plus_8_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) add %g2, 8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) ba,pt %xcc, __restore_fp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) add %o2, %g2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) ENDPROC(U3_retl_o2_plus_g2_plus_8_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) ENTRY(U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) mov %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) ENDPROC(U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) ENTRY(U3_retl_o2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) add %o2, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) ENDPROC(U3_retl_o2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) ENTRY(U3_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) add %o2, 4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) ENDPROC(U3_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) ENTRY(U3_retl_o2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) add %o2, 8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) ENDPROC(U3_retl_o2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) ENTRY(U3_retl_o2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) add %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) add %o2, %g1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) ENDPROC(U3_retl_o2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) ENTRY(U3_retl_o2_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) ba,pt %xcc, __restore_fp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) mov %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) ENDPROC(U3_retl_o2_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) sll %o3, 6, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) add %o3, 0x80, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) ba,pt %xcc, __restore_fp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) add %o2, %o3, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) sll %o3, 6, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) add %o3, 0x40, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) ba,pt %xcc, __restore_fp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) add %o2, %o3, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) ENTRY(U3_retl_o2_plus_GS_plus_0x10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) add GLOBAL_SPARE, 0x10, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) add %o2, GLOBAL_SPARE, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) ENDPROC(U3_retl_o2_plus_GS_plus_0x10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) ENTRY(U3_retl_o2_plus_GS_plus_0x08)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) add GLOBAL_SPARE, 0x08, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) add %o2, GLOBAL_SPARE, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) ENDPROC(U3_retl_o2_plus_GS_plus_0x08)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) ENTRY(U3_retl_o2_and_7_plus_GS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) and %o2, 7, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) add %o2, GLOBAL_SPARE, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) ENDPROC(U3_retl_o2_and_7_plus_GS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) ENTRY(U3_retl_o2_and_7_plus_GS_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) add GLOBAL_SPARE, 8, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) and %o2, 7, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) add %o2, GLOBAL_SPARE, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) /* The cheetah's flexible spine, oversized liver, enlarged heart,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) * slender muscular body, and claws make it the swiftest hunter
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) * in Africa and the fastest animal on land. Can reach speeds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) * of up to 2.4GB per second.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) .globl FUNC_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) .type FUNC_NAME,#function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) srlx %o2, 31, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) cmp %g2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) /* software trap 5 "Range Check" if dst >= 0x80000000 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) tne %xcc, 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) mov %o0, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) /* if len == 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) cmp %o2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) be,pn %XCC, end_return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) or %o0, %o1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) /* if len < 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) cmp %o2, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) blu,a,pn %XCC, less_than_16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) or %o3, %o2, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /* if len < 192 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) cmp %o2, (3 * 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) blu,pt %XCC, less_than_192
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) andcc %o3, 0x7, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) * o5 from here until we hit VISExitHalf.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) VISEntryHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) /* Is 'dst' already aligned on an 64-byte boundary? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) andcc %o0, 0x3f, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) be,pt %XCC, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) * of bytes to copy to make 'dst' 64-byte aligned. We pre-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) * subtract this from 'len'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) sub %o0, %o1, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) sub %g2, 0x40, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) sub %g0, %g2, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) sub %o2, %g2, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) andcc %g2, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) be,pt %icc, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) and %g2, 0x38, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 1: subcc %g1, 0x1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) bgu,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) add %o1, 0x1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) add %o1, GLOBAL_SPARE, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 2: cmp %g2, 0x0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) and %o1, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) be,pt %icc, 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) alignaddr %o1, %g0, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) subcc %g2, 0x8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) faligndata %f4, %f6, %f0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) be,pn %icc, 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) add %o0, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) subcc %g2, 0x8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) faligndata %f6, %f4, %f2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) add %o0, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 3: LOAD(prefetch, %o1 + 0x000, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) LOAD(prefetch, %o1 + 0x040, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) andn %o2, (0x40 - 1), GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) LOAD(prefetch, %o1 + 0x080, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) LOAD(prefetch, %o1 + 0x0c0, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) LOAD(prefetch, %o1 + 0x100, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) LOAD(prefetch, %o1 + 0x140, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) LOAD(prefetch, %o1 + 0x180, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) LOAD(prefetch, %o1 + 0x1c0, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) faligndata %f0, %f2, %f16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) faligndata %f2, %f4, %f18
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) faligndata %f4, %f6, %f20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) faligndata %f6, %f8, %f22
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) faligndata %f8, %f10, %f24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) faligndata %f10, %f12, %f26
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) add %o1, 0x40, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) bgu,pt %XCC, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) srl GLOBAL_SPARE, 6, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) ba,pt %xcc, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) faligndata %f12, %f14, %f28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) faligndata %f14, %f0, %f30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) faligndata %f0, %f2, %f16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) add %o0, 0x40, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) faligndata %f2, %f4, %f18
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) faligndata %f4, %f6, %f20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) subcc %o3, 0x01, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) faligndata %f6, %f8, %f22
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) faligndata %f8, %f10, %f24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) LOAD(prefetch, %o1 + 0x1c0, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) faligndata %f10, %f12, %f26
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) bg,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) add %o1, 0x40, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) /* Finally we copy the last full 64-byte block. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) faligndata %f12, %f14, %f28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) faligndata %f14, %f0, %f30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) faligndata %f0, %f2, %f16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) faligndata %f2, %f4, %f18
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) faligndata %f4, %f6, %f20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) faligndata %f6, %f8, %f22
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) faligndata %f8, %f10, %f24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) cmp %g1, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) be,pt %XCC, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) add %o0, 0x40, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 1: faligndata %f10, %f12, %f26
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) faligndata %f12, %f14, %f28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) faligndata %f14, %f0, %f30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) add %o0, 0x40, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) add %o1, 0x40, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) membar #Sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) /* Now we copy the (len modulo 64) bytes at the end.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) * Note how we borrow the %f0 loaded above.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) * Also notice how this code is careful not to perform a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) * load past the end of the src buffer.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) and %o2, 0x3f, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) andcc %o2, 0x38, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) be,pn %XCC, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) subcc %g2, 0x8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) be,pn %XCC, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) cmp %g1, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) sub %o2, %g2, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) be,a,pt %XCC, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) subcc %g2, 0x8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) faligndata %f0, %f2, %f8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) be,pn %XCC, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) add %o0, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) subcc %g2, 0x8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) faligndata %f2, %f0, %f8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) bne,pn %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) add %o0, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) /* If anything is left, we copy it one byte at a time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) * Note that %g1 is (src & 0x3) saved above before the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) * alignaddr was performed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) cmp %o2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) add %o1, %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) be,pn %XCC, end_return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) sub %o0, %o1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) andcc %g1, 0x7, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) bne,pn %icc, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) andcc %o2, 0x8, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) be,pt %icc, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) sub %o2, 8, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 1: andcc %o2, 0x4, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) be,pt %icc, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) add %o1, 0x4, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) sub %o2, 4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 1: andcc %o2, 0x2, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) be,pt %icc, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) add %o1, 0x2, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) sub %o2, 2, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 1: andcc %o2, 0x1, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) be,pt %icc, end_return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) ba,pt %xcc, end_return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) /* 16 <= len < 192 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) less_than_192:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) bne,pn %XCC, 75f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) sub %o0, %o1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 72:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) andn %o2, 0xf, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) and %o2, 0xf, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) bgu,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 73: andcc %o2, 0x8, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) be,pt %XCC, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) sub %o2, 0x8, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 1: andcc %o2, 0x4, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) be,pt %XCC, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) sub %o2, 0x4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) add %o1, 0x4, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 1: cmp %o2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) be,pt %XCC, end_return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) ba,pt %xcc, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 75:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) andcc %o0, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) sub %g1, 0x8, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) be,pn %icc, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) sub %g0, %g1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) sub %o2, %g1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 1: subcc %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) bgu,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) add %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 2: add %o1, %o3, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) andcc %o1, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) bne,pt %icc, 8f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) sll %g1, 3, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) cmp %o2, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) bgeu,pt %icc, 72b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) ba,a,pt %xcc, 73b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) 8: mov 64, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) andn %o1, 0x7, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) sub %o3, %g1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) andn %o2, 0x7, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) sllx %g2, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) srlx %g3, %o3, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) or %o5, %g2, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) add %o0, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) bgu,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) sllx %g3, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) srl %g1, 3, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) andcc %o2, 0x7, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) be,pn %icc, end_return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) add %o1, %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) ba,pt %xcc, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) sub %o0, %o1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) /* 0 < len < 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) less_than_16:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) andcc %o3, 0x3, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) bne,pn %XCC, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) sub %o0, %o1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) subcc %o2, 4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) bgu,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) add %o1, 4, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) end_return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) mov EX_RETVAL(%o4), %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) .align 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) 90:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) subcc %o2, 1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) bgu,pt %XCC, 90b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) add %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) mov EX_RETVAL(%o4), %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) .size FUNC_NAME, .-FUNC_NAME