^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* NG2memcpy.S: Niagara-2 optimized memcpy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2007 David S. Miller (davem@davemloft.net)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #ifdef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <asm/visasm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <asm/asi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #define GLOBAL_SPARE %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #define ASI_PNF 0x82
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #define ASI_BLK_P 0xf0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #define ASI_BLK_INIT_QUAD_LDD_P 0xe2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #define FPRS_FEF 0x04
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #ifdef MEMCPY_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) #define VISEntryHalf rd %fprs, %o5; wr %g0, FPRS_FEF, %fprs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) #define GLOBAL_SPARE %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #ifndef STORE_ASI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define STORE_ASI 0x80 /* ASI_P */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #ifndef EX_LD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) #define EX_LD(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #ifndef EX_LD_FP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #define EX_LD_FP(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #ifndef EX_ST
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #define EX_ST(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #ifndef EX_ST_FP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #define EX_ST_FP(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #ifndef LOAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #define LOAD(type,addr,dest) type [addr], dest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #ifndef LOAD_BLK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #define LOAD_BLK(addr,dest) ldda [addr] ASI_BLK_P, dest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #ifndef STORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #ifndef MEMCPY_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #define STORE(type,src,addr) type src, [addr]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #define STORE(type,src,addr) type##a src, [addr] 0x80
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #ifndef STORE_BLK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #define STORE_BLK(src,addr) stda src, [addr] ASI_BLK_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #ifndef STORE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #ifndef FUNC_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #define FUNC_NAME NG2memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #ifndef PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #define PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #ifndef XCC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #define XCC xcc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #define FREG_FROB(x0, x1, x2, x3, x4, x5, x6, x7, x8) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) faligndata %x0, %x1, %f0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) faligndata %x1, %x2, %f2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) faligndata %x2, %x3, %f4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) faligndata %x3, %x4, %f6; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) faligndata %x4, %x5, %f8; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) faligndata %x5, %x6, %f10; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) faligndata %x6, %x7, %f12; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) faligndata %x7, %x8, %f14;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) #define FREG_MOVE_1(x0) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) fsrc2 %x0, %f0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) #define FREG_MOVE_2(x0, x1) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) fsrc2 %x0, %f0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) fsrc2 %x1, %f2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) #define FREG_MOVE_3(x0, x1, x2) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) fsrc2 %x0, %f0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) fsrc2 %x1, %f2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) fsrc2 %x2, %f4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) #define FREG_MOVE_4(x0, x1, x2, x3) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) fsrc2 %x0, %f0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) fsrc2 %x1, %f2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) fsrc2 %x2, %f4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) fsrc2 %x3, %f6;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) #define FREG_MOVE_5(x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) fsrc2 %x0, %f0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) fsrc2 %x1, %f2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) fsrc2 %x2, %f4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) fsrc2 %x3, %f6; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) fsrc2 %x4, %f8;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) #define FREG_MOVE_6(x0, x1, x2, x3, x4, x5) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) fsrc2 %x0, %f0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) fsrc2 %x1, %f2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) fsrc2 %x2, %f4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) fsrc2 %x3, %f6; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) fsrc2 %x4, %f8; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) fsrc2 %x5, %f10;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) #define FREG_MOVE_7(x0, x1, x2, x3, x4, x5, x6) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) fsrc2 %x0, %f0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) fsrc2 %x1, %f2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) fsrc2 %x2, %f4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) fsrc2 %x3, %f6; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) fsrc2 %x4, %f8; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) fsrc2 %x5, %f10; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) fsrc2 %x6, %f12;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) #define FREG_MOVE_8(x0, x1, x2, x3, x4, x5, x6, x7) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) fsrc2 %x0, %f0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) fsrc2 %x1, %f2; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) fsrc2 %x2, %f4; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) fsrc2 %x3, %f6; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) fsrc2 %x4, %f8; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) fsrc2 %x5, %f10; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) fsrc2 %x6, %f12; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) fsrc2 %x7, %f14;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) #define FREG_LOAD_1(base, x0) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) #define FREG_LOAD_2(base, x0, x1) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) #define FREG_LOAD_3(base, x0, x1, x2) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) #define FREG_LOAD_4(base, x0, x1, x2, x3) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) .register %g2,#scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) .register %g3,#scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) #ifndef EX_RETVAL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) #define EX_RETVAL(x) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) __restore_fp:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) __restore_asi:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) wr %g0, ASI_AIUS, %asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) ENTRY(NG2_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) mov %o2, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) ENDPROC(NG2_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) ENTRY(NG2_retl_o2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) add %o2, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) ENDPROC(NG2_retl_o2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) ENTRY(NG2_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) add %o2, 4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) ENDPROC(NG2_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) ENTRY(NG2_retl_o2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) add %o2, 8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) ENDPROC(NG2_retl_o2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) ENTRY(NG2_retl_o2_plus_o4_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) add %o4, 1, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) add %o2, %o4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) ENDPROC(NG2_retl_o2_plus_o4_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) ENTRY(NG2_retl_o2_plus_o4_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) add %o4, 8, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) add %o2, %o4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) ENDPROC(NG2_retl_o2_plus_o4_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) ENTRY(NG2_retl_o2_plus_o4_plus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) add %o4, 16, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) add %o2, %o4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) ENDPROC(NG2_retl_o2_plus_o4_plus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) ENTRY(NG2_retl_o2_plus_g1_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) ba,pt %xcc, __restore_fp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) add %o2, %g1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) ENDPROC(NG2_retl_o2_plus_g1_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) ENTRY(NG2_retl_o2_plus_g1_plus_64_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) add %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) ba,pt %xcc, __restore_fp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) add %o2, %g1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) ENTRY(NG2_retl_o2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) add %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) add %o2, %g1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) ENDPROC(NG2_retl_o2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) ENTRY(NG2_retl_o2_and_7_plus_o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) and %o2, 7, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) add %o2, %o4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) ENDPROC(NG2_retl_o2_and_7_plus_o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) and %o2, 7, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) add %o4, 8, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) ba,pt %xcc, __restore_asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) add %o2, %o4, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) .globl FUNC_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) .type FUNC_NAME,#function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) srlx %o2, 31, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) cmp %g2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) tne %xcc, 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) mov %o0, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) cmp %o2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) be,pn %XCC, 85f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) or %o0, %o1, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) cmp %o2, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) blu,a,pn %XCC, 80f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) or GLOBAL_SPARE, %o2, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) /* 2 blocks (128 bytes) is the minimum we can do the block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) * copy with. We need to ensure that we'll iterate at least
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) * once in the block copy loop. At worst we'll need to align
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) * the destination to a 64-byte boundary which can chew up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) * to (64 - 1) bytes from the length before we perform the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) * block copy loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) * However, the cut-off point, performance wise, is around
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) * 4 64-byte blocks.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) cmp %o2, (4 * 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) blu,pt %XCC, 75f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) andcc GLOBAL_SPARE, 0x7, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) /* %o0: dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) * %o1: src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * %o2: len (known to be >= 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) * The block copy loops can use %o4, %g2, %g3 as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * temporaries while copying the data. %o5 must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * be preserved between VISEntryHalf and VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) LOAD(prefetch, %o1 + 0x000, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) LOAD(prefetch, %o1 + 0x040, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) LOAD(prefetch, %o1 + 0x080, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) /* Align destination on 64-byte boundary. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) andcc %o0, (64 - 1), %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) be,pt %XCC, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) sub %o4, 64, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) sub %g0, %o4, %o4 ! bytes to align dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) sub %o2, %o4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 1: subcc %o4, 1, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) add %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) bne,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) add %o0, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) /* Clobbers o5/g1/g2/g3/g7/icc/xcc. We must preserve
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * o5 from here until we hit VISExitHalf.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) VISEntryHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) membar #Sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) alignaddr %o1, %g0, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) add %o1, (64 - 1), %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) andn %o4, (64 - 1), %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) andn %o2, (64 - 1), %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) sub %o2, %g1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) and %o1, (64 - 1), %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) add %o1, %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) sub %o0, %o4, %g3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) brz,pt %g2, 190f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) cmp %g2, 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) blu,a 5f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) cmp %g2, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) cmp %g2, 48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) blu,a 4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) cmp %g2, 40
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) cmp %g2, 56
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) blu 170f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) ba,a,pt %xcc, 180f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 4: /* 32 <= low bits < 48 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) blu 150f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) ba,a,pt %xcc, 160f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 5: /* 0 < low bits < 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) blu,a 6f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) cmp %g2, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) cmp %g2, 24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) blu 130f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) ba,a,pt %xcc, 140f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 6: /* 0 < low bits < 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) bgeu 120f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) /* fall through for 0 < low bits < 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 110: sub %o4, 64, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) add %o4, 64, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) bne,pt %xcc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) LOAD(prefetch, %o4 + 64, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) ba,pt %xcc, 195f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 120: sub %o4, 56, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) add %o4, 64, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) bne,pt %xcc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) LOAD(prefetch, %o4 + 64, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) ba,pt %xcc, 195f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 130: sub %o4, 48, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) FREG_MOVE_6(f20, f22, f24, f26, f28, f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) add %o4, 64, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) bne,pt %xcc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) LOAD(prefetch, %o4 + 64, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) ba,pt %xcc, 195f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 140: sub %o4, 40, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) FREG_LOAD_5(%g2, f0, f2, f4, f6, f8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) FREG_MOVE_5(f22, f24, f26, f28, f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) add %o4, 64, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) bne,pt %xcc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) LOAD(prefetch, %o4 + 64, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) ba,pt %xcc, 195f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 150: sub %o4, 32, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) FREG_LOAD_4(%g2, f0, f2, f4, f6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) FREG_MOVE_4(f24, f26, f28, f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) add %o4, 64, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) bne,pt %xcc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) LOAD(prefetch, %o4 + 64, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) ba,pt %xcc, 195f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 160: sub %o4, 24, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) FREG_LOAD_3(%g2, f0, f2, f4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) FREG_MOVE_3(f26, f28, f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) add %o4, 64, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) bne,pt %xcc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) LOAD(prefetch, %o4 + 64, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) ba,pt %xcc, 195f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 170: sub %o4, 16, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) FREG_LOAD_2(%g2, f0, f2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) FREG_MOVE_2(f28, f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) add %o4, 64, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) bne,pt %xcc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) LOAD(prefetch, %o4 + 64, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) ba,pt %xcc, 195f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 180: sub %o4, 8, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) FREG_LOAD_1(%g2, f0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) FREG_MOVE_1(f30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) add %o4, 64, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) bne,pt %xcc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) LOAD(prefetch, %o4 + 64, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) ba,pt %xcc, 195f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 190:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) subcc %g1, 64, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) add %o4, 64, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) bne,pt %xcc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) LOAD(prefetch, %o4 + 64, #one_read)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 195:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) add %o4, %g3, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) membar #Sync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) /* %o2 contains any final bytes still needed to be copied
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) * over. If anything is left, we copy it one byte at a time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) brz,pt %o2, 85f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) sub %o0, %o1, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) ba,a,pt %XCC, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 75: /* 16 < len <= 64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) bne,pn %XCC, 75f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) sub %o0, %o1, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) 72:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) andn %o2, 0xf, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) and %o2, 0xf, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 1: subcc %o4, 0x10, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) add %o1, 0x08, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) sub %o1, 0x08, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) bgu,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 73: andcc %o2, 0x8, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) be,pt %XCC, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) sub %o2, 0x8, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) 1: andcc %o2, 0x4, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) be,pt %XCC, 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) sub %o2, 0x4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) add %o1, 0x4, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 1: cmp %o2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) be,pt %XCC, 85f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) ba,pt %xcc, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) 75:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) andcc %o0, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) sub %g1, 0x8, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) be,pn %icc, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) sub %g0, %g1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) sub %o2, %g1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) 1: subcc %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) bgu,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) add %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 2: add %o1, GLOBAL_SPARE, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) andcc %o1, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) bne,pt %icc, 8f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) sll %g1, 3, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) cmp %o2, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) bgeu,pt %icc, 72b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) ba,a,pt %xcc, 73b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) 8: mov 64, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) andn %o1, 0x7, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) sub GLOBAL_SPARE, %g1, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) andn %o2, 0x7, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) sllx %g2, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) 1: add %o1, 0x8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) subcc %o4, 0x8, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) srlx %g3, GLOBAL_SPARE, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) or %o5, %g2, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) add %o0, 0x8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) bgu,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) sllx %g3, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) srl %g1, 3, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) andcc %o2, 0x7, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) be,pn %icc, 85f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) add %o1, %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) ba,pt %xcc, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) sub %o0, %o1, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) 80: /* 0 < len <= 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) andcc GLOBAL_SPARE, 0x3, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) bne,pn %XCC, 90f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) sub %o0, %o1, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) subcc %o2, 4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) bgu,pt %XCC, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) add %o1, 4, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) 85: retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) mov EX_RETVAL(%o3), %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) .align 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) 90:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) subcc %o2, 1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) bgu,pt %XCC, 90b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) add %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) mov EX_RETVAL(%o3), %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) .size FUNC_NAME, .-FUNC_NAME