^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * arch/xtensa/lib/hal/memcopy.S -- Core HAL library functions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * xthal_memcpy and xthal_bcopy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * This file is subject to the terms and conditions of the GNU General Public
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * License. See the file "COPYING" in the main directory of this archive
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * for more details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * Copyright (C) 2002 - 2012 Tensilica Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #include <asm/asmmacro.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <asm/core.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * void *memcpy(void *dst, const void *src, size_t len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * This function is intended to do the same thing as the standard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * library function memcpy() for most cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * However, where the source and/or destination references
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * an instruction RAM or ROM or a data RAM or ROM, that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * source and/or destination will always be accessed with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * 32-bit load and store instructions (as required for these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * types of devices).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * !!!!!!! XTFIXME:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * !!!!!!! Handling of IRAM/IROM has not yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * !!!!!!! been implemented.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) * The (general case) algorithm is as follows:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) * If destination is unaligned, align it by conditionally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) * copying 1 and 2 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) * If source is aligned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) * do 16 bytes with a loop, and then finish up with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) * 8, 4, 2, and 1 byte copies conditional on the length;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) * else (if source is unaligned),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) * do the same, but use SRC to align the source data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * This code tries to use fall-through branches for the common
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * case of aligned source and destination and multiple
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * of 4 (or 8) length.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * Register use:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) * a0/ return address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * a1/ stack pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * a2/ return value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * a3/ src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * a4/ length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) * a5/ dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * a6/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * a7/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * a8/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) * a9/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * a10/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * a11/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * Byte by byte copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) .byte 0 # 1 mod 4 alignment for LOOPNEZ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) # (0 mod 4 alignment for LBEG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) .Lbytecopy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) loopnez a4, .Lbytecopydone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) #else /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) beqz a4, .Lbytecopydone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) add a7, a3, a4 # a7 = end address for source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) .Lnextbyte:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) addi a3, a3, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) addi a5, a5, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) bne a3, a7, .Lnextbyte # continue loop if $a3:src != $a7:src_end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) .Lbytecopydone:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) * Destination is unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) .Ldst1mod2: # dst is only byte aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) _bltui a4, 7, .Lbytecopy # do short copies byte by byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) # copy 1 byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) addi a3, a3, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) addi a4, a4, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) addi a5, a5, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) _bbci.l a5, 1, .Ldstaligned # if dst is now aligned, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) # return to main algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) .Ldst2mod4: # dst 16-bit aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) # copy 2 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) _bltui a4, 6, .Lbytecopy # do short copies byte by byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) l8ui a7, a3, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) addi a3, a3, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) addi a4, a4, -2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) s8i a7, a5, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) addi a5, a5, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) j .Ldstaligned # dst is now aligned, return to main algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) ENTRY(__memcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) WEAK(memcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) abi_entry_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) # a2/ dst, a3/ src, a4/ len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) mov a5, a2 # copy dst so that a2 is return value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) .Lcommon:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) _bbsi.l a2, 0, .Ldst1mod2 # if dst is 1 mod 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) _bbsi.l a2, 1, .Ldst2mod4 # if dst is 2 mod 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) .Ldstaligned: # return here from .Ldst?mod? once dst is aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) srli a7, a4, 4 # number of loop iterations with 16B
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) # per iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) movi a8, 3 # if source is not aligned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) _bany a3, a8, .Lsrcunaligned # then use shifting copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) * Destination and source are word-aligned, use word copy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) # copy 16 bytes per iteration for word-aligned dst and word-aligned src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) loopnez a7, .Loop1done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) #else /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) beqz a7, .Loop1done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) slli a8, a7, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) add a8, a8, a3 # a8 = end of last 16B source chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) .Loop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) l32i a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) l32i a7, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) s32i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) l32i a6, a3, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) s32i a7, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) l32i a7, a3, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) s32i a6, a5, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) addi a3, a3, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) s32i a7, a5, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) addi a5, a5, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) bne a3, a8, .Loop1 # continue loop if a3:src != a8:src_end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) .Loop1done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) bbci.l a4, 3, .L2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) # copy 8 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) l32i a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) l32i a7, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) addi a3, a3, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) s32i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) s32i a7, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) addi a5, a5, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) .L2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) bbsi.l a4, 2, .L3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) bbsi.l a4, 1, .L4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) bbsi.l a4, 0, .L5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) .L3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) # copy 4 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) l32i a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) addi a3, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) s32i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) addi a5, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) bbsi.l a4, 1, .L4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) bbsi.l a4, 0, .L5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) .L4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) # copy 2 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) l16ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) addi a3, a3, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) s16i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) addi a5, a5, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) bbsi.l a4, 0, .L5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) .L5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) # copy 1 byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) * Destination is aligned, Source is unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) .Lsrcunaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) _beqz a4, .Ldone # avoid loading anything for zero-length copies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) # copy 16 bytes per iteration for word-aligned dst and unaligned src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) __ssa8 a3 # set shift amount from byte offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) /* set to 1 when running on ISS (simulator) with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) lint or ferret client, or 0 to save a few cycles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) #define SIM_CHECKS_ALIGNMENT 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) and a11, a3, a8 # save unalignment offset for below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) sub a3, a3, a11 # align a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) l32i a6, a3, 0 # load first word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) loopnez a7, .Loop2done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) #else /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) beqz a7, .Loop2done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) slli a10, a7, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) add a10, a10, a3 # a10 = end of last 16B source chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) .Loop2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) l32i a7, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) l32i a8, a3, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) __src_b a6, a6, a7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) s32i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) l32i a9, a3, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) __src_b a7, a7, a8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) s32i a7, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) l32i a6, a3, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) __src_b a8, a8, a9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) s32i a8, a5, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) addi a3, a3, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) __src_b a9, a9, a6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) s32i a9, a5, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) addi a5, a5, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) bne a3, a10, .Loop2 # continue loop if a3:src != a10:src_end
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) .Loop2done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) bbci.l a4, 3, .L12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) # copy 8 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) l32i a7, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) l32i a8, a3, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) __src_b a6, a6, a7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) s32i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) addi a3, a3, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) __src_b a7, a7, a8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) s32i a7, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) addi a5, a5, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) mov a6, a8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) .L12:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) bbci.l a4, 2, .L13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) # copy 4 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) l32i a7, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) addi a3, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) __src_b a6, a6, a7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) s32i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) addi a5, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) mov a6, a7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) .L13:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) add a3, a3, a11 # readjust a3 with correct misalignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) bbsi.l a4, 1, .L14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) bbsi.l a4, 0, .L15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) .Ldone: abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) .L14:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) # copy 2 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) l8ui a7, a3, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) addi a3, a3, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) s8i a7, a5, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) addi a5, a5, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) bbsi.l a4, 0, .L15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) .L15:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) # copy 1 byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) ENDPROC(__memcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) * void bcopy(const void *src, void *dest, size_t n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) ENTRY(bcopy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) abi_entry_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) # a2=src, a3=dst, a4=len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) mov a5, a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) mov a3, a2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) mov a2, a5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) j .Lmovecommon # go to common code for memmove+bcopy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) ENDPROC(bcopy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) * void *memmove(void *dst, const void *src, size_t len);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) * This function is intended to do the same thing as the standard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) * library function memmove() for most cases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * However, where the source and/or destination references
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * an instruction RAM or ROM or a data RAM or ROM, that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * source and/or destination will always be accessed with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * 32-bit load and store instructions (as required for these
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * types of devices).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) * !!!!!!! XTFIXME:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) * !!!!!!! Handling of IRAM/IROM has not yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) * !!!!!!! been implemented.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) * The (general case) algorithm is as follows:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) * If end of source doesn't overlap destination then use memcpy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) * Otherwise do memcpy backwards.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) * Register use:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) * a0/ return address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) * a1/ stack pointer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) * a2/ return value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) * a3/ src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) * a4/ length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) * a5/ dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) * a6/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) * a7/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * a8/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) * a9/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * a10/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * a11/ tmp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) * Byte by byte copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) .byte 0 # 1 mod 4 alignment for LOOPNEZ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) # (0 mod 4 alignment for LBEG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) .Lbackbytecopy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) loopnez a4, .Lbackbytecopydone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) #else /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) beqz a4, .Lbackbytecopydone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) sub a7, a3, a4 # a7 = start address for source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) .Lbacknextbyte:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) addi a3, a3, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) addi a5, a5, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) bne a3, a7, .Lbacknextbyte # continue loop if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) # $a3:src != $a7:src_start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) .Lbackbytecopydone:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) * Destination is unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) .Lbackdst1mod2: # dst is only byte aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) _bltui a4, 7, .Lbackbytecopy # do short copies byte by byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) # copy 1 byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) addi a3, a3, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) addi a5, a5, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) addi a4, a4, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) _bbci.l a5, 1, .Lbackdstaligned # if dst is now aligned, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) # return to main algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) .Lbackdst2mod4: # dst 16-bit aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) # copy 2 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) _bltui a4, 6, .Lbackbytecopy # do short copies byte by byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) addi a3, a3, -2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) l8ui a7, a3, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) addi a5, a5, -2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) s8i a7, a5, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) addi a4, a4, -2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) j .Lbackdstaligned # dst is now aligned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) # return to main algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) ENTRY(__memmove)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) WEAK(memmove)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) abi_entry_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) # a2/ dst, a3/ src, a4/ len
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) mov a5, a2 # copy dst so that a2 is return value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) .Lmovecommon:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) sub a6, a5, a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) bgeu a6, a4, .Lcommon
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) add a5, a5, a4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) add a3, a3, a4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) _bbsi.l a5, 0, .Lbackdst1mod2 # if dst is 1 mod 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) _bbsi.l a5, 1, .Lbackdst2mod4 # if dst is 2 mod 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) .Lbackdstaligned: # return here from .Lbackdst?mod? once dst is aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) srli a7, a4, 4 # number of loop iterations with 16B
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) # per iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) movi a8, 3 # if source is not aligned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) _bany a3, a8, .Lbacksrcunaligned # then use shifting copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) * Destination and source are word-aligned, use word copy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) # copy 16 bytes per iteration for word-aligned dst and word-aligned src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) loopnez a7, .backLoop1done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) #else /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) beqz a7, .backLoop1done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) slli a8, a7, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) sub a8, a3, a8 # a8 = start of first 16B source chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) .backLoop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) addi a3, a3, -16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) l32i a7, a3, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) l32i a6, a3, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) addi a5, a5, -16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) s32i a7, a5, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) l32i a7, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) s32i a6, a5, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) l32i a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) s32i a7, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) s32i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) bne a3, a8, .backLoop1 # continue loop if a3:src != a8:src_start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) .backLoop1done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) bbci.l a4, 3, .Lback2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) # copy 8 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) addi a3, a3, -8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) l32i a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) l32i a7, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) addi a5, a5, -8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) s32i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) s32i a7, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) .Lback2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) bbsi.l a4, 2, .Lback3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) bbsi.l a4, 1, .Lback4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) bbsi.l a4, 0, .Lback5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) .Lback3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) # copy 4 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) addi a3, a3, -4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) l32i a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) addi a5, a5, -4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) s32i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) bbsi.l a4, 1, .Lback4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) bbsi.l a4, 0, .Lback5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) .Lback4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) # copy 2 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) addi a3, a3, -2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) l16ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) addi a5, a5, -2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) s16i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) bbsi.l a4, 0, .Lback5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) .Lback5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) # copy 1 byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) addi a3, a3, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) addi a5, a5, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) * Destination is aligned, Source is unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) .Lbacksrcunaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) _beqz a4, .Lbackdone # avoid loading anything for zero-length copies
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) # copy 16 bytes per iteration for word-aligned dst and unaligned src
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) __ssa8 a3 # set shift amount from byte offset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) #define SIM_CHECKS_ALIGNMENT 1 /* set to 1 when running on ISS with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) * the lint or ferret client, or 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) * to save a few cycles */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) and a11, a3, a8 # save unalignment offset for below
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) sub a3, a3, a11 # align a3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) l32i a6, a3, 0 # load first word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) loopnez a7, .backLoop2done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) #else /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) beqz a7, .backLoop2done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) slli a10, a7, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) sub a10, a3, a10 # a10 = start of first 16B source chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) .backLoop2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) addi a3, a3, -16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) l32i a7, a3, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) l32i a8, a3, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) addi a5, a5, -16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) __src_b a6, a7, a6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) s32i a6, a5, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) l32i a9, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) __src_b a7, a8, a7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) s32i a7, a5, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) l32i a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) __src_b a8, a9, a8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) s32i a8, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) __src_b a9, a6, a9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) s32i a9, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) bne a3, a10, .backLoop2 # continue loop if a3:src != a10:src_start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) .backLoop2done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) bbci.l a4, 3, .Lback12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) # copy 8 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) addi a3, a3, -8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) l32i a7, a3, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) l32i a8, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) addi a5, a5, -8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) __src_b a6, a7, a6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) s32i a6, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) __src_b a7, a8, a7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) s32i a7, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) mov a6, a8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) .Lback12:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) bbci.l a4, 2, .Lback13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) # copy 4 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) addi a3, a3, -4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) l32i a7, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) addi a5, a5, -4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) __src_b a6, a7, a6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) s32i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) mov a6, a7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) .Lback13:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) #if XCHAL_UNALIGNED_LOAD_EXCEPTION || SIM_CHECKS_ALIGNMENT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) add a3, a3, a11 # readjust a3 with correct misalignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) bbsi.l a4, 1, .Lback14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) bbsi.l a4, 0, .Lback15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) .Lbackdone:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) .Lback14:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) # copy 2 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) addi a3, a3, -2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) l8ui a7, a3, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) addi a5, a5, -2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) s8i a7, a5, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) bbsi.l a4, 0, .Lback15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) .Lback15:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) # copy 1 byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) addi a3, a3, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) addi a5, a5, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) l8ui a6, a3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) s8i a6, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) ENDPROC(__memmove)