^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * "memset" implementation for SH4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 1999 Niibe Yutaka
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Copyright (c) 2009 STMicroelectronics Limited
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Author: Stuart Menefy <stuart.menefy:st.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * void *memset(void *s, int c, size_t n);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) ENTRY(memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) mov #12,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) add r6,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) cmp/gt r6,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) bt/s 40f ! if it's too small, set a byte at once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) mov r4,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) and #3,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) cmp/eq #0,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) bt/s 2f ! It's aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) sub r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) dt r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) bf/s 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) mov.b r5,@-r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) 2: ! make VVVV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) extu.b r5,r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) swap.b r5,r0 ! V0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) or r0,r5 ! VV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) swap.w r5,r0 ! VV00
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) or r0,r5 ! VVVV
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) ! Check if enough bytes need to be copied to be worth the big loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) mov #0x40, r0 ! (MT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) cmp/gt r6,r0 ! (MT) 64 > len => slow loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) bt/s 22f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) mov r6,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) ! align the dst to the cache block size if necessary
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) mov r4, r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) mov #~(0x1f), r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) and r3, r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) cmp/eq r3, r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) bt/s 11f ! dst is already aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) sub r1, r3 ! r3-r1 -> r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) shlr2 r3 ! number of loops
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) 10: mov.l r5,@-r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) dt r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) bf/s 10b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) add #-4, r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) 11: ! dst is 32byte aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) mov r6,r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) mov #-5,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) shld r0,r2 ! number of loops
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) add #-32, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) mov r5, r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) 12:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) movca.l r0,@r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) mov.l r5,@(4, r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) mov.l r5,@(8, r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) mov.l r5,@(12,r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) mov.l r5,@(16,r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) mov.l r5,@(20,r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) add #-0x20, r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) mov.l r5,@(24,r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) dt r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) mov.l r5,@(28,r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) bf/s 12b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) add #-32, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) add #32, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) mov #8, r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) cmp/ge r0, r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) bf 40f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) mov r6,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) 22:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) shlr2 r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) shlr r0 ! r0 = r6 >> 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) 3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) dt r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) mov.l r5,@-r4 ! set 8-byte at once
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) bf/s 3b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) mov.l r5,@-r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) !
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) mov #7,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) and r0,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) ! fill bytes (length may be zero)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 40: tst r6,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) bt 5f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) dt r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) bf/s 4b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) mov.b r5,@-r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 5:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) rts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) mov r4,r0