^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* U1memcpy.S: UltraSPARC-I/II/IIi/IIe optimized memcpy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 1997, 2004 David S. Miller (davem@redhat.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 1996, 1997, 1998, 1999 Jakub Jelinek (jj@ultra.linux.cz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #ifdef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <asm/visasm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <asm/asi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #define GLOBAL_SPARE g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #define GLOBAL_SPARE g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #define ASI_BLK_P 0xf0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #define FPRS_FEF 0x04
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #ifdef MEMCPY_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) clr %g1; clr %g2; clr %g3; subcc %g0, %g0, %g0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define VISEntry rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #define VISExit and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #ifndef EX_LD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define EX_LD(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #ifndef EX_LD_FP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define EX_LD_FP(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #ifndef EX_ST
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #define EX_ST(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #ifndef EX_ST_FP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #define EX_ST_FP(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #ifndef LOAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #define LOAD(type,addr,dest) type [addr], dest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #ifndef LOAD_BLK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #ifndef STORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #define STORE(type,src,addr) type src, [addr]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #ifndef STORE_BLK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #ifndef FUNC_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #define FUNC_NAME memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #ifndef PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #define PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #ifndef XCC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #define XCC xcc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #define FREG_FROB(f1, f2, f3, f4, f5, f6, f7, f8, f9) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) faligndata %f1, %f2, %f48; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) faligndata %f2, %f3, %f50; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) faligndata %f3, %f4, %f52; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) faligndata %f4, %f5, %f54; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) faligndata %f5, %f6, %f56; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) faligndata %f6, %f7, %f58; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) faligndata %f7, %f8, %f60; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) faligndata %f8, %f9, %f62;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) add %src, 0x40, %src; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) be,pn %xcc, jmptgt; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) add %dest, 0x40, %dest; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) #define LOOP_CHUNK1(src, dest, branch_dest) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #define LOOP_CHUNK2(src, dest, branch_dest) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #define LOOP_CHUNK3(src, dest, branch_dest) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #define DO_SYNC membar #Sync;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) #define STORE_SYNC(dest, fsrc) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) add %dest, 0x40, %dest; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) DO_SYNC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) #define STORE_JUMP(dest, fsrc, target) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) add %dest, 0x40, %dest; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) ba,pt %xcc, target; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) nop;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) #define FINISH_VISCHUNK(dest, f0, f1) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) subcc %g3, 8, %g3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) bl,pn %xcc, 95f; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) faligndata %f0, %f1, %f48; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) add %dest, 8, %dest;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) #define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) subcc %g3, 8, %g3; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) bl,pn %xcc, 95f; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) fsrc2 %f0, %f1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) #define UNEVEN_VISCHUNK(dest, f0, f1) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) UNEVEN_VISCHUNK_LAST(dest, f0, f1) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) ba,a,pt %xcc, 93f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) .register %g2,#scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) .register %g3,#scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) #ifndef EX_RETVAL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) #define EX_RETVAL(x) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) ENTRY(U1_g1_1_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) add %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) add %g1, %g2, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) add %g1, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) ENDPROC(U1_g1_1_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) ENTRY(U1_g2_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) add %g2, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) ENDPROC(U1_g2_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) ENTRY(U1_g2_8_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) add %g2, 8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) add %g2, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) ENDPROC(U1_g2_8_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) ENTRY(U1_gs_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) add %GLOBAL_SPARE, %g3, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) add %o0, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) ENDPROC(U1_gs_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) ENTRY(U1_gs_80_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) add %GLOBAL_SPARE, %g3, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) add %o0, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) ENDPROC(U1_gs_80_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) ENTRY(U1_gs_40_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) add %GLOBAL_SPARE, %g3, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) add %o0, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) ENDPROC(U1_gs_40_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) ENTRY(U1_g3_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) add %g3, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) ENDPROC(U1_g3_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) ENTRY(U1_g3_8_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) add %g3, 8, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) add %g3, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) ENDPROC(U1_g3_8_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) ENTRY(U1_o2_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) mov %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) ENDPROC(U1_o2_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) ENTRY(U1_o2_1_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) add %o2, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) ENDPROC(U1_o2_1_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) ENTRY(U1_gs_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) add %GLOBAL_SPARE, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) ENDPROC(U1_gs_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) ENTRY(U1_gs_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) add %GLOBAL_SPARE, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) ENDPROC(U1_gs_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) ENTRY(U1_gs_10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) add %GLOBAL_SPARE, 0x10, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) ENDPROC(U1_gs_10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) ENTRY(U1_o2_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) mov %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) ENDPROC(U1_o2_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) ENTRY(U1_o2_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) add %o2, 8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) ENDPROC(U1_o2_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) ENTRY(U1_o2_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) add %o2, 4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) ENDPROC(U1_o2_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) ENTRY(U1_o2_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) add %o2, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) ENDPROC(U1_o2_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) ENTRY(U1_g1_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) add %g1, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) ENDPROC(U1_g1_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) ENTRY(U1_g1_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) add %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) add %g1, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) ENDPROC(U1_g1_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) ENTRY(U1_gs_0_o2_adj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) and %o2, 7, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) add %GLOBAL_SPARE, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) ENDPROC(U1_gs_0_o2_adj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) ENTRY(U1_gs_8_o2_adj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) and %o2, 7, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) add %GLOBAL_SPARE, 8, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) add %GLOBAL_SPARE, %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) ENDPROC(U1_gs_8_o2_adj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) .globl FUNC_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) .type FUNC_NAME,#function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) srlx %o2, 31, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) cmp %g2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) tne %xcc, 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) mov %o0, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) cmp %o2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) be,pn %XCC, 85f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) or %o0, %o1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) cmp %o2, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) blu,a,pn %XCC, 80f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) or %o3, %o2, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) cmp %o2, (5 * 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) blu,pt %XCC, 70f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) andcc %o3, 0x7, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) /* Clobbers o5/g1/g2/g3/g7/icc/xcc. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) VISEntry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) /* Is 'dst' already aligned on an 64-byte boundary? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) andcc %o0, 0x3f, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) be,pt %XCC, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) /* Compute abs((dst & 0x3f) - 0x40) into %g2. This is the number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) * of bytes to copy to make 'dst' 64-byte aligned. We pre-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) * subtract this from 'len'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) sub %o0, %o1, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) sub %g2, 0x40, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) sub %g0, %g2, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) sub %o2, %g2, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) andcc %g2, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) be,pt %icc, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) and %g2, 0x38, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 1: subcc %g1, 0x1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) bgu,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) add %o1, 0x1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) add %o1, %GLOBAL_SPARE, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 2: cmp %g2, 0x0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) and %o1, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) be,pt %icc, 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) alignaddr %o1, %g0, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) subcc %g2, 0x8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) faligndata %f4, %f6, %f0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) be,pn %icc, 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) add %o0, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) subcc %g2, 0x8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) faligndata %f6, %f4, %f0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) add %o0, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) /* Destination is 64-byte aligned. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) membar #LoadStore | #StoreStore | #StoreLoad
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) subcc %o2, 0x40, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) add %o1, %g1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) andncc %GLOBAL_SPARE, (0x40 - 1), %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) srl %g1, 3, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) sub %o2, %GLOBAL_SPARE, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) andn %o1, (0x40 - 1), %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) and %g2, 7, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) andncc %g3, 0x7, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) fsrc2 %f0, %f2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) sub %g3, 0x8, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) sub %o2, %GLOBAL_SPARE, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) add %g1, %GLOBAL_SPARE, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) subcc %o2, %g3, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) add %o1, 0x40, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) add %g1, %g3, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) add %o1, 0x40, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) add %o1, 0x40, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) /* There are 8 instances of the unrolled loop,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) * one for each possible alignment of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) * source buffer. Each loop instance is 452
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) * bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) sll %g2, 3, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) sub %o3, %g2, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) sllx %o3, 4, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) add %o3, %g2, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) sllx %o3, 2, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 1: rd %pc, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) add %o3, %lo(1f - 1b), %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) jmpl %o3 + %g2, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) LOOP_CHUNK1(o1, o0, 1f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) LOOP_CHUNK2(o1, o0, 2f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) LOOP_CHUNK3(o1, o0, 3f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) ba,pt %xcc, 1b+4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) faligndata %f0, %f2, %f48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) STORE_JUMP(o0, f48, 40f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 2: FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) STORE_JUMP(o0, f48, 48f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 3: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) STORE_JUMP(o0, f48, 56f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) LOOP_CHUNK1(o1, o0, 1f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) LOOP_CHUNK2(o1, o0, 2f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) LOOP_CHUNK3(o1, o0, 3f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) ba,pt %xcc, 1b+4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) faligndata %f2, %f4, %f48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) STORE_JUMP(o0, f48, 41f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 2: FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) STORE_JUMP(o0, f48, 49f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 3: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) STORE_JUMP(o0, f48, 57f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) LOOP_CHUNK1(o1, o0, 1f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) LOOP_CHUNK2(o1, o0, 2f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) LOOP_CHUNK3(o1, o0, 3f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) ba,pt %xcc, 1b+4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) faligndata %f4, %f6, %f48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) STORE_JUMP(o0, f48, 42f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) 2: FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) STORE_JUMP(o0, f48, 50f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 3: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) STORE_JUMP(o0, f48, 58f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) LOOP_CHUNK1(o1, o0, 1f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) LOOP_CHUNK2(o1, o0, 2f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) LOOP_CHUNK3(o1, o0, 3f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) ba,pt %xcc, 1b+4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) faligndata %f6, %f8, %f48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) STORE_JUMP(o0, f48, 43f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 2: FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) STORE_JUMP(o0, f48, 51f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 3: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) STORE_JUMP(o0, f48, 59f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) LOOP_CHUNK1(o1, o0, 1f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) LOOP_CHUNK2(o1, o0, 2f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) LOOP_CHUNK3(o1, o0, 3f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) ba,pt %xcc, 1b+4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) faligndata %f8, %f10, %f48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) STORE_JUMP(o0, f48, 44f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 2: FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) STORE_JUMP(o0, f48, 52f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 3: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) STORE_JUMP(o0, f48, 60f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) LOOP_CHUNK1(o1, o0, 1f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) LOOP_CHUNK2(o1, o0, 2f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) LOOP_CHUNK3(o1, o0, 3f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) ba,pt %xcc, 1b+4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) faligndata %f10, %f12, %f48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) STORE_JUMP(o0, f48, 45f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 2: FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) STORE_JUMP(o0, f48, 53f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 3: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) STORE_JUMP(o0, f48, 61f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) LOOP_CHUNK1(o1, o0, 1f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) LOOP_CHUNK2(o1, o0, 2f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) LOOP_CHUNK3(o1, o0, 3f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) ba,pt %xcc, 1b+4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) faligndata %f12, %f14, %f48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) STORE_JUMP(o0, f48, 46f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 2: FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) STORE_JUMP(o0, f48, 54f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 3: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) STORE_JUMP(o0, f48, 62f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) LOOP_CHUNK1(o1, o0, 1f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) LOOP_CHUNK2(o1, o0, 2f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) LOOP_CHUNK3(o1, o0, 3f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) ba,pt %xcc, 1b+4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) faligndata %f14, %f16, %f48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) STORE_JUMP(o0, f48, 47f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 2: FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) STORE_JUMP(o0, f48, 55f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) 3: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) STORE_SYNC(o0, f48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) STORE_JUMP(o0, f48, 63f)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) 40: FINISH_VISCHUNK(o0, f0, f2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) 41: FINISH_VISCHUNK(o0, f2, f4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) 42: FINISH_VISCHUNK(o0, f4, f6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) 43: FINISH_VISCHUNK(o0, f6, f8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) 44: FINISH_VISCHUNK(o0, f8, f10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) 45: FINISH_VISCHUNK(o0, f10, f12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) 46: FINISH_VISCHUNK(o0, f12, f14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) 47: UNEVEN_VISCHUNK(o0, f14, f0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) 48: FINISH_VISCHUNK(o0, f16, f18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) 49: FINISH_VISCHUNK(o0, f18, f20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 50: FINISH_VISCHUNK(o0, f20, f22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) 51: FINISH_VISCHUNK(o0, f22, f24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) 52: FINISH_VISCHUNK(o0, f24, f26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) 53: FINISH_VISCHUNK(o0, f26, f28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) 54: FINISH_VISCHUNK(o0, f28, f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) 55: UNEVEN_VISCHUNK(o0, f30, f0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) 56: FINISH_VISCHUNK(o0, f32, f34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) 57: FINISH_VISCHUNK(o0, f34, f36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) 58: FINISH_VISCHUNK(o0, f36, f38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) 59: FINISH_VISCHUNK(o0, f38, f40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 60: FINISH_VISCHUNK(o0, f40, f42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) 61: FINISH_VISCHUNK(o0, f42, f44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 62: FINISH_VISCHUNK(o0, f44, f46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) 63: UNEVEN_VISCHUNK_LAST(o0, f46, f0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) 93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) add %o1, 8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) subcc %g3, 8, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) faligndata %f0, %f2, %f8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) bl,pn %xcc, 95f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) add %o0, 8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) add %o1, 8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) subcc %g3, 8, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) faligndata %f2, %f0, %f8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) bge,pt %xcc, 93b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) add %o0, 8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) 95: brz,pt %o2, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) mov %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) 1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) add %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) subcc %o2, 1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) bne,pt %xcc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) add %o0, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) 2: membar #StoreLoad | #StoreStore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) VISExit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) mov EX_RETVAL(%o4), %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) 70: /* 16 < len <= (5 * 64) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) bne,pn %XCC, 75f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) sub %o0, %o1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) 72: andn %o2, 0xf, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) and %o2, 0xf, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) 1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) bgu,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) 73: andcc %o2, 0x8, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) be,pt %XCC, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) sub %o2, 0x8, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602) 1: andcc %o2, 0x4, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) be,pt %XCC, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) sub %o2, 0x4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) add %o1, 0x4, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) 1: cmp %o2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) be,pt %XCC, 85f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) ba,pt %xcc, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) 75: andcc %o0, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) sub %g1, 0x8, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) be,pn %icc, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) sub %g0, %g1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) sub %o2, %g1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) 1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) subcc %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) bgu,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) add %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) 2: add %o1, %o3, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) andcc %o1, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) bne,pt %icc, 8f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) sll %g1, 3, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) cmp %o2, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) bgeu,pt %icc, 72b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) ba,a,pt %xcc, 73b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637) 8: mov 64, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) andn %o1, 0x7, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) sub %o3, %g1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) andn %o2, 0x7, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) sllx %g2, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) 1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) srlx %g3, %o3, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) or %o5, %g2, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648) EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) add %o0, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650) bgu,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) sllx %g3, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) srl %g1, 3, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) andcc %o2, 0x7, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) be,pn %icc, 85f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) add %o1, %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) ba,pt %xcc, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) sub %o0, %o1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) 80: /* 0 < len <= 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) andcc %o3, 0x3, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) bne,pn %XCC, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664) sub %o0, %o1, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) 1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 667) subcc %o2, 4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 668) EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 669) bgu,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 670) add %o1, 4, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 671)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 672) 85: retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 673) mov EX_RETVAL(%o4), %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 674)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 675) .align 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 676) 90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 677) subcc %o2, 1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 678) EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 679) bgu,pt %XCC, 90b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 680) add %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 681) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 682) mov EX_RETVAL(%o4), %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 683)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 684) .size FUNC_NAME, .-FUNC_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 685) EXPORT_SYMBOL(FUNC_NAME)