^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* NG4memcpy.S: Niagara-4 optimized memcpy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2012 David S. Miller (davem@davemloft.net)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #ifdef __KERNEL__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <asm/visasm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <asm/asi.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #define GLOBAL_SPARE %g7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #define ASI_BLK_INIT_QUAD_LDD_P 0xe2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #define FPRS_FEF 0x04
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) /* On T4 it is very expensive to access ASRs like %fprs and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * %asi, avoiding a read or a write can save ~50 cycles.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #define FPU_ENTER \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) rd %fprs, %o5; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) andcc %o5, FPRS_FEF, %g0; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) be,a,pn %icc, 999f; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) wr %g0, FPRS_FEF, %fprs; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) 999:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) #ifdef MEMCPY_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #define VISEntryHalf FPU_ENTER; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) clr %g1; clr %g2; clr %g3; clr %g5; subcc %g0, %g0, %g0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) #define VISEntryHalf FPU_ENTER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #define VISExitHalf and %o5, FPRS_FEF, %o5; wr %o5, 0x0, %fprs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) #define GLOBAL_SPARE %g5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) #ifndef STORE_ASI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) #ifndef SIMULATE_NIAGARA_ON_NON_NIAGARA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) #define STORE_ASI ASI_BLK_INIT_QUAD_LDD_P
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #define STORE_ASI 0x80 /* ASI_P */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) #if !defined(EX_LD) && !defined(EX_ST)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) #define NON_USER_COPY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) #ifndef EX_LD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #define EX_LD(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) #ifndef EX_LD_FP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #define EX_LD_FP(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) #ifndef EX_ST
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #define EX_ST(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #ifndef EX_ST_FP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) #define EX_ST_FP(x,y) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) #ifndef LOAD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #define LOAD(type,addr,dest) type [addr], dest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #ifndef STORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #ifndef MEMCPY_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #define STORE(type,src,addr) type src, [addr]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #define STORE(type,src,addr) type##a src, [addr] %asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #ifndef STORE_INIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #define STORE_INIT(src,addr) stxa src, [addr] STORE_ASI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #ifndef FUNC_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #define FUNC_NAME NG4memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) #ifndef PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) #define PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) #ifndef XCC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) #define XCC xcc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) .register %g2,#scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) .register %g3,#scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) #ifndef EX_RETVAL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) #define EX_RETVAL(x) x
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) .align 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) .globl FUNC_NAME
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) .type FUNC_NAME,#function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) #ifdef MEMCPY_DEBUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) wr %g0, 0x80, %asi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) srlx %o2, 31, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) cmp %g2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) tne %XCC, 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) PREAMBLE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) mov %o0, %o3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) brz,pn %o2, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) cmp %o2, 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) ble,pn %icc, .Ltiny
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) cmp %o2, 19
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) ble,pn %icc, .Lsmall
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) or %o0, %o1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) cmp %o2, 128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) bl,pn %icc, .Lmedium
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) .Llarge:/* len >= 0x80 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) /* First get dest 8 byte aligned. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) sub %g0, %o0, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) and %g1, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) brz,pt %g1, 51f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) sub %o2, %g1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) add %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) subcc %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) add %o0, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) LOAD(prefetch, %o1 + 0x080, #n_reads_strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) LOAD(prefetch, %o1 + 0x0c0, #n_reads_strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) LOAD(prefetch, %o1 + 0x100, #n_reads_strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) LOAD(prefetch, %o1 + 0x140, #n_reads_strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) LOAD(prefetch, %o1 + 0x180, #n_reads_strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) LOAD(prefetch, %o1 + 0x1c0, #n_reads_strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) /* Check if we can use the straight fully aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) * loop, or we require the alignaddr/faligndata variant.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) andcc %o1, 0x7, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) bne,pn %icc, .Llarge_src_unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) sub %g0, %o0, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) /* Legitimize the use of initializing stores by getting dest
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) * to be 64-byte aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) and %g1, 0x3f, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) brz,pt %g1, .Llarge_aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) sub %o2, %g1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) add %o1, 8, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) subcc %g1, 8, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) add %o0, 8, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) EX_ST(STORE(stx, %g2, %o0 - 0x08), memcpy_retl_o2_plus_g1_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) .Llarge_aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) andn %o2, 0x3f, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) sub %o2, %o4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) add %o1, 0x40, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) EX_LD(LOAD(ldx, %o1 - 0x38, %g2), memcpy_retl_o2_plus_o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) subcc %o4, 0x40, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) EX_LD(LOAD(ldx, %o1 - 0x30, %g3), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) EX_LD(LOAD(ldx, %o1 - 0x20, %o5), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) EX_ST(STORE_INIT(%g1, %o0), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) add %o0, 0x08, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) add %o0, 0x08, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) EX_LD(LOAD(ldx, %o1 - 0x18, %g2), memcpy_retl_o2_plus_o4_plus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) add %o0, 0x08, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) EX_LD(LOAD(ldx, %o1 - 0x10, %g3), memcpy_retl_o2_plus_o4_plus_40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) add %o0, 0x08, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), memcpy_retl_o2_plus_o4_plus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) EX_ST(STORE_INIT(%o5, %o0), memcpy_retl_o2_plus_o4_plus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) add %o0, 0x08, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) EX_ST(STORE_INIT(%g2, %o0), memcpy_retl_o2_plus_o4_plus_24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) add %o0, 0x08, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) EX_ST(STORE_INIT(%g3, %o0), memcpy_retl_o2_plus_o4_plus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) add %o0, 0x08, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), memcpy_retl_o2_plus_o4_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) add %o0, 0x08, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) LOAD(prefetch, %o1 + 0x200, #n_reads_strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) membar #StoreLoad | #StoreStore
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) brz,pn %o2, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) cmp %o2, 19
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) ble,pn %icc, .Lsmall_unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) ba,a,pt %icc, .Lmedium_noprefetch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) .Lexit: retl
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) mov EX_RETVAL(%o3), %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) .Llarge_src_unaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) #ifdef NON_USER_COPY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) VISEntryHalfFast(.Lmedium_vis_entry_fail)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) VISEntryHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) andn %o2, 0x3f, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) sub %o2, %o4, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) alignaddr %o1, %g0, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) add %o1, %o4, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), memcpy_retl_o2_plus_o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), memcpy_retl_o2_plus_o4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) subcc %o4, 0x40, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) faligndata %f0, %f2, %f16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) faligndata %f2, %f4, %f18
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) add %g1, 0x40, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) faligndata %f4, %f6, %f20
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) faligndata %f6, %f8, %f22
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) faligndata %f8, %f10, %f24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) faligndata %f10, %f12, %f26
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) faligndata %f12, %f14, %f28
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) faligndata %f14, %f0, %f30
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) EX_ST_FP(STORE(std, %f16, %o0 + 0x00), memcpy_retl_o2_plus_o4_plus_64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) EX_ST_FP(STORE(std, %f18, %o0 + 0x08), memcpy_retl_o2_plus_o4_plus_56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) EX_ST_FP(STORE(std, %f20, %o0 + 0x10), memcpy_retl_o2_plus_o4_plus_48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) EX_ST_FP(STORE(std, %f22, %o0 + 0x18), memcpy_retl_o2_plus_o4_plus_40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) EX_ST_FP(STORE(std, %f24, %o0 + 0x20), memcpy_retl_o2_plus_o4_plus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) EX_ST_FP(STORE(std, %f26, %o0 + 0x28), memcpy_retl_o2_plus_o4_plus_24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) EX_ST_FP(STORE(std, %f28, %o0 + 0x30), memcpy_retl_o2_plus_o4_plus_16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) EX_ST_FP(STORE(std, %f30, %o0 + 0x38), memcpy_retl_o2_plus_o4_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) add %o0, 0x40, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) LOAD(prefetch, %g1 + 0x200, #n_reads_strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) #ifdef NON_USER_COPY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) VISExitHalfFast
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) VISExitHalf
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) brz,pn %o2, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) cmp %o2, 19
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) ble,pn %icc, .Lsmall_unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) ba,a,pt %icc, .Lmedium_unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) #ifdef NON_USER_COPY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) .Lmedium_vis_entry_fail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) or %o0, %o1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) .Lmedium:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) LOAD(prefetch, %o1 + 0x40, #n_reads_strong)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) andcc %g2, 0x7, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) bne,pn %icc, .Lmedium_unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) .Lmedium_noprefetch:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) andncc %o2, 0x20 - 1, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) be,pn %icc, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) sub %o2, %o5, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) EX_LD(LOAD(ldx, %o1 + 0x08, %g2), memcpy_retl_o2_plus_o5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), memcpy_retl_o2_plus_o5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) EX_LD(LOAD(ldx, %o1 + 0x18, %o4), memcpy_retl_o2_plus_o5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) add %o1, 0x20, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) subcc %o5, 0x20, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) EX_ST(STORE(stx, %g1, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) EX_ST(STORE(stx, %g2, %o0 + 0x08), memcpy_retl_o2_plus_o5_plus_24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), memcpy_retl_o2_plus_o5_plus_24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) EX_ST(STORE(stx, %o4, %o0 + 0x18), memcpy_retl_o2_plus_o5_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) add %o0, 0x20, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 2: andcc %o2, 0x18, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) be,pt %icc, 3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) sub %o2, %o5, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) add %o1, 0x08, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) add %o0, 0x08, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) subcc %o5, 0x08, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) EX_ST(STORE(stx, %g1, %o0 - 0x08), memcpy_retl_o2_plus_o5_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 3: brz,pt %o2, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) cmp %o2, 0x04
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) bl,pn %icc, .Ltiny
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) add %o1, 0x04, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) add %o0, 0x04, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) subcc %o2, 0x04, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) bne,pn %icc, .Ltiny
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) ba,a,pt %icc, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) .Lmedium_unaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) /* First get dest 8 byte aligned. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) sub %g0, %o0, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) and %g1, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) brz,pt %g1, 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) sub %o2, %g1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), memcpy_retl_o2_plus_g1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) add %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) subcc %g1, 1, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) add %o0, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) EX_ST(STORE(stb, %g2, %o0 - 0x01), memcpy_retl_o2_plus_g1_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) and %o1, 0x7, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) brz,pn %g1, .Lmedium_noprefetch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) sll %g1, 3, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) mov 64, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) sub %g2, %g1, %g2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) andn %o1, 0x7, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) EX_LD(LOAD(ldx, %o1 + 0x00, %o4), memcpy_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) sllx %o4, %g1, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) andn %o2, 0x08 - 1, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) sub %o2, %o5, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) 1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), memcpy_retl_o2_plus_o5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) add %o1, 0x08, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) subcc %o5, 0x08, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) srlx %g3, %g2, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) or GLOBAL_SPARE, %o4, GLOBAL_SPARE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), memcpy_retl_o2_plus_o5_plus_8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) add %o0, 0x08, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) sllx %g3, %g1, %o4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) srl %g1, 3, %g1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) add %o1, %g1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) brz,pn %o2, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) ba,pt %icc, .Lsmall_unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) .Ltiny:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) subcc %o2, 1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) be,pn %icc, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) EX_ST(STORE(stb, %g1, %o0 + 0x00), memcpy_retl_o2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) EX_LD(LOAD(ldub, %o1 + 0x01, %g1), memcpy_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) subcc %o2, 1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) be,pn %icc, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) EX_ST(STORE(stb, %g1, %o0 + 0x01), memcpy_retl_o2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) EX_LD(LOAD(ldub, %o1 + 0x02, %g1), memcpy_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) ba,pt %icc, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) EX_ST(STORE(stb, %g1, %o0 + 0x02), memcpy_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) .Lsmall:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) andcc %g2, 0x3, %g0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) bne,pn %icc, .Lsmall_unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) andn %o2, 0x4 - 1, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) sub %o2, %o5, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) EX_LD(LOAD(lduw, %o1 + 0x00, %g1), memcpy_retl_o2_plus_o5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) add %o1, 0x04, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) subcc %o5, 0x04, %o5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) add %o0, 0x04, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) EX_ST(STORE(stw, %g1, %o0 - 0x04), memcpy_retl_o2_plus_o5_plus_4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) brz,pt %o2, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) ba,a,pt %icc, .Ltiny
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) .Lsmall_unaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), memcpy_retl_o2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) add %o1, 1, %o1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) add %o0, 1, %o0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) subcc %o2, 1, %o2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) bne,pt %icc, 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) EX_ST(STORE(stb, %g1, %o0 - 0x01), memcpy_retl_o2_plus_1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) ba,a,pt %icc, .Lexit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) .size FUNC_NAME, .-FUNC_NAME