^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) /* If dst and src are 4 byte aligned, copy 8 bytes at a time.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) If the src is 4, but not 8 byte aligned, we first read 4 bytes to get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) it 8 byte aligned. Thus, we can do a little read-ahead, without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) dereferencing a cache line that we should not touch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) Note that short and long instructions have been scheduled to avoid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) branch stalls.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) The beq_s to r3z could be made unaligned & long to avoid a stall
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) there, but the it is not likely to be taken often, and it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) would also be likey to cost an unaligned mispredict at the next call. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) ENTRY_CFI(strcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) or r2,r0,r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) bmsk_s r2,r2,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) brne.d r2,0,charloop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) mov_s r10,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) ld_s r3,[r1,0]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) mov r8,0x01010101
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) bbit0.d r1,2,loop_start
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) ror r12,r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) sub r2,r3,r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) bic_s r2,r2,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) tst_s r2,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) bne r3z
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) mov_s r4,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) .balign 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) ld.a r3,[r1,4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) st.ab r4,[r10,4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) loop_start:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) ld.a r4,[r1,4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) sub r2,r3,r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) bic_s r2,r2,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) tst_s r2,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) bne_s r3z
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) st.ab r3,[r10,4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) sub r2,r4,r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) bic r2,r2,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) tst r2,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) beq loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) mov_s r3,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) #ifdef __LITTLE_ENDIAN__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) r3z: bmsk.f r1,r3,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) lsr_s r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) r3z: lsr.f r1,r3,24
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) asl_s r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) bne.d r3z
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) stb.ab r1,[r10,1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) j_s [blink]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) .balign 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) charloop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) ldb.ab r3,[r1,1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) brne.d r3,0,charloop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) stb.ab r3,[r10,1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) j [blink]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) END_CFI(strcpy)