^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /* Copyright 2002 Andi Kleen, SuSE Labs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <asm/cpufeatures.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <asm/alternative-asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * ISO C memset - set a memory block to a byte value. This function uses fast
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * string to get better performance than the original function. The code is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * simpler and shorter than the original function as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * rdi destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * rsi value (char)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * rdx count (bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * rax original destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) SYM_FUNC_START_WEAK(memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) SYM_FUNC_START(__memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * to use it when possible. If not available, use fast string instructions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) * Otherwise, use original memset function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) "jmp memset_erms", X86_FEATURE_ERMS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) movq %rdi,%r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) movq %rdx,%rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) andl $7,%edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) shrq $3,%rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) /* expand byte value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) movzbl %sil,%esi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) movabs $0x0101010101010101,%rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) imulq %rsi,%rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) rep stosq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) movl %edx,%ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) rep stosb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) movq %r9,%rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) SYM_FUNC_END(__memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) SYM_FUNC_END_ALIAS(memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) EXPORT_SYMBOL(memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) EXPORT_SYMBOL(__memset)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) * ISO C memset - set a memory block to a byte value. This function uses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) * enhanced rep stosb to override the fast string function.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) * The code is simpler and shorter than the fast string function as well.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) * rdi destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * rsi value (char)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * rdx count (bytes)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * rax original destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) SYM_FUNC_START_LOCAL(memset_erms)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) movq %rdi,%r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) movb %sil,%al
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) movq %rdx,%rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) rep stosb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) movq %r9,%rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) SYM_FUNC_END(memset_erms)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) SYM_FUNC_START_LOCAL(memset_orig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) movq %rdi,%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) /* expand byte value */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) movzbl %sil,%ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) movabs $0x0101010101010101,%rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) imulq %rcx,%rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) /* align dst */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) movl %edi,%r9d
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) andl $7,%r9d
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) jnz .Lbad_alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) .Lafter_bad_alignment:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) movq %rdx,%rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) shrq $6,%rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) jz .Lhandle_tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) .p2align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) .Lloop_64:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) decq %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) movq %rax,(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) movq %rax,8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) movq %rax,16(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) movq %rax,24(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) movq %rax,32(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) movq %rax,40(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) movq %rax,48(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) movq %rax,56(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) leaq 64(%rdi),%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) jnz .Lloop_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) /* Handle tail in loops. The loops should be faster than hard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) to predict jump tables. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) .p2align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) .Lhandle_tail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) movl %edx,%ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) andl $63&(~7),%ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) jz .Lhandle_7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) shrl $3,%ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) .p2align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) .Lloop_8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) decl %ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) movq %rax,(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) leaq 8(%rdi),%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) jnz .Lloop_8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) .Lhandle_7:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) andl $7,%edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) jz .Lende
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) .p2align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) .Lloop_1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) decl %edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) movb %al,(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) leaq 1(%rdi),%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) jnz .Lloop_1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) .Lende:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) movq %r10,%rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) .Lbad_alignment:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) cmpq $7,%rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) jbe .Lhandle_7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) movq %rax,(%rdi) /* unaligned store */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) movq $8,%r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) subq %r9,%r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) addq %r8,%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) subq %r8,%rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) jmp .Lafter_bad_alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) .Lfinal:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) SYM_FUNC_END(memset_orig)