^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2014-15 Synopsys, Inc. (www.synopsys.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <asm/cache.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * The memset implementation below is optimized to use prefetchw and prealloc
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * instruction in case of CPU with 64B L1 data cache line (L1_CACHE_SHIFT == 6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * If you want to implement optimized memset for other possible L1 data cache
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * line lengths (32B and 128B) you should rewrite code carefully checking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * we don't call any prefetchw/prealloc instruction for L1 cache lines which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * don't belongs to memset area.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #if L1_CACHE_SHIFT == 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) .macro PREALLOC_INSTR reg, off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) prealloc [\reg, \off]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) .macro PREFETCHW_INSTR reg, off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) prefetchw [\reg, \off]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) .macro PREALLOC_INSTR reg, off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) .macro PREFETCHW_INSTR reg, off
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) ENTRY_CFI(memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) PREFETCHW_INSTR r0, 0 ; Prefetch the first write location
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) mov.f 0, r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) ;;; if size is zero
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) jz.d [blink]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) mov r3, r0 ; don't clobber ret val
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) ;;; if length < 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) brls.d.nt r2, 8, .Lsmallchunk
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) mov.f lp_count,r2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) and.f r4, r0, 0x03
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) rsub lp_count, r4, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) lpnz @.Laligndestination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) ;; LOOP BEGIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) stb.ab r1, [r3,1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) sub r2, r2, 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) .Laligndestination:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) ;;; Destination is aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) and r1, r1, 0xFF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) asl r4, r1, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) or r4, r4, r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) asl r5, r4, 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) or r5, r5, r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) mov r4, r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) sub3 lp_count, r2, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) cmp r2, 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) bmsk.hi r2, r2, 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) mov.ls lp_count, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) add3.hi r2, r2, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) ;;; Convert len to Dwords, unfold x8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) lsr.f lp_count, lp_count, 6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) lpnz @.Lset64bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) ;; LOOP START
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) PREALLOC_INSTR r3, 64 ; alloc next line w/o fetching
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #ifdef CONFIG_ARC_HAS_LL64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) .Lset64bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) lsr.f lp_count, r2, 5 ;Last remaining max 124 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) lpnz .Lset32bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) ;; LOOP START
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) #ifdef CONFIG_ARC_HAS_LL64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) std.ab r4, [r3, 8]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) st.ab r4, [r3, 4]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) .Lset32bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) and.f lp_count, r2, 0x1F ;Last remaining 31 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) .Lsmallchunk:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) lpnz .Lcopy3bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) ;; LOOP START
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) stb.ab r1, [r3, 1]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) .Lcopy3bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) j [blink]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) END_CFI(memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) ENTRY_CFI(memzero)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) ; adjust bzero args to memset args
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) mov r2, r1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) b.d memset ;tail call so need to tinker with blink
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) mov r1, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) END_CFI(memzero)