^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * arch/xtensa/lib/memset.S
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * ANSI C standard library function memset
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * (Well, almost. .fixup code might return zero.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * This file is subject to the terms and conditions of the GNU General
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * Public License. See the file "COPYING" in the main directory of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * this archive for more details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Copyright (C) 2002 Tensilica Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #include <asm/asmmacro.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <asm/core.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * void *memset(void *dst, int c, size_t length)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * The algorithm is as follows:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * Create a word with c in all byte positions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * If the destination is aligned,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * do 16B chucks with a loop, and then finish up with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * 8B, 4B, 2B, and 1B stores conditional on the length.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * If destination is unaligned, align it by conditionally
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * setting 1B and 2B and then go to aligned case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * This code tries to use fall-through branches for the common
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * case of an aligned destination (except for the branches to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) * the alignment labels).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) ENTRY(__memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) WEAK(memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) abi_entry_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) # a2/ dst, a3/ c, a4/ length
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) extui a3, a3, 0, 8 # mask to just 8 bits
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) slli a7, a3, 8 # duplicate character in all bytes of word
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) or a3, a3, a7 # ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) slli a7, a3, 16 # ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) or a3, a3, a7 # ...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) mov a5, a2 # copy dst so that a2 is return value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) movi a6, 3 # for alignment tests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) bany a2, a6, .Ldstunaligned # if dst is unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) .L0: # return here from .Ldstunaligned when dst is aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) srli a7, a4, 4 # number of loop iterations with 16B
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) # per iteration
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) bnez a4, .Laligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * Destination is word-aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) # set 16 bytes per iteration for word-aligned dst
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) .align 4 # 1 mod 4 alignment for LOOPNEZ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) .byte 0 # (0 mod 4 alignment for LBEG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) .Laligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) loopnez a7, .Loop1done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) #else /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) beqz a7, .Loop1done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) slli a6, a7, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) add a6, a6, a5 # a6 = end of last 16B chunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) .Loop1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) EX(10f) s32i a3, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) EX(10f) s32i a3, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) EX(10f) s32i a3, a5, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) EX(10f) s32i a3, a5, 12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) addi a5, a5, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) blt a5, a6, .Loop1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) .Loop1done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) bbci.l a4, 3, .L2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) # set 8 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) EX(10f) s32i a3, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) EX(10f) s32i a3, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) addi a5, a5, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) .L2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) bbci.l a4, 2, .L3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) # set 4 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) EX(10f) s32i a3, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) addi a5, a5, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) .L3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) bbci.l a4, 1, .L4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) # set 2 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) EX(10f) s16i a3, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) addi a5, a5, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) .L4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) bbci.l a4, 0, .L5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) # set 1 byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) EX(10f) s8i a3, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) .L5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) .Lret1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) * Destination is unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) .Ldstunaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) bltui a4, 8, .Lbyteset # do short copies byte by byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) bbci.l a5, 0, .L20 # branch if dst alignment half-aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) # dst is only byte aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) # set 1 byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) EX(10f) s8i a3, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) addi a5, a5, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) addi a4, a4, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) # now retest if dst aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) bbci.l a5, 1, .L0 # if now aligned, return to main algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) .L20:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) # dst half-aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) # set 2 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) EX(10f) s16i a3, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) addi a5, a5, 2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) addi a4, a4, -2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) j .L0 # dst is now aligned, return to main algorithm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) * Byte by byte set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) .byte 0 # 1 mod 4 alignment for LOOPNEZ
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) # (0 mod 4 alignment for LBEG)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) .Lbyteset:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) #if XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) loopnez a4, .Lbytesetdone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) #else /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) beqz a4, .Lbytesetdone
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) add a6, a5, a4 # a6 = ending address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) .Lbyteloop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) EX(10f) s8i a3, a5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) addi a5, a5, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) #if !XCHAL_HAVE_LOOPS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) blt a5, a6, .Lbyteloop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) #endif /* !XCHAL_HAVE_LOOPS */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) .Lbytesetdone:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) abi_ret_default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) ENDPROC(__memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) .section .fixup, "ax"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) /* We return zero if a failure occurred. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 10:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) movi a2, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) abi_ret_default