Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) /* SPDX-License-Identifier: GPL-2.0-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /* Copyright 2002 Andi Kleen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5) #include <asm/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6) #include <asm/cpufeatures.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7) #include <asm/alternative-asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10) .pushsection .noinstr.text, "ax"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  * We build a jump to memcpy_orig by default which gets NOPped out on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  * the majority of x86 CPUs which set REP_GOOD. In addition, CPUs which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  * have the enhanced REP MOVSB/STOSB feature (ERMS), change those NOPs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16)  * to a jmp to memcpy_erms which does the REP; MOVSB mem copy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20)  * memcpy - Copy a memory block.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22)  * Input:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23)  *  rdi destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24)  *  rsi source
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25)  *  rdx count
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27)  * Output:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28)  * rax original destination
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) SYM_FUNC_START_ALIAS(__memcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) SYM_FUNC_START_WEAK(memcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 	ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 		      "jmp memcpy_erms", X86_FEATURE_ERMS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	movq %rdi, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	movq %rdx, %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 	shrq $3, %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 	andl $7, %edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	rep movsq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 	movl %edx, %ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	rep movsb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) SYM_FUNC_END(memcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) SYM_FUNC_END_ALIAS(__memcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) EXPORT_SYMBOL(memcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) EXPORT_SYMBOL(__memcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49)  * memcpy_erms() - enhanced fast string memcpy. This is faster and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50)  * simpler than memcpy. Use memcpy_erms when possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) SYM_FUNC_START_LOCAL(memcpy_erms)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 	movq %rdi, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 	movq %rdx, %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 	rep movsb
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) SYM_FUNC_END(memcpy_erms)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) SYM_FUNC_START_LOCAL(memcpy_orig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 	movq %rdi, %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	cmpq $0x20, %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	jb .Lhandle_tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	 * We check whether memory false dependence could occur,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 	 * then jump to corresponding copy mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	cmp  %dil, %sil
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 	jl .Lcopy_backward
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	subq $0x20, %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) .Lcopy_forward_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 	subq $0x20,	%rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	 * Move in blocks of 4x8 bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 	movq 0*8(%rsi),	%r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	movq 1*8(%rsi),	%r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	movq 2*8(%rsi),	%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	movq 3*8(%rsi),	%r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	leaq 4*8(%rsi),	%rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) 	movq %r8,	0*8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	movq %r9,	1*8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 	movq %r10,	2*8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	movq %r11,	3*8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	leaq 4*8(%rdi),	%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	jae  .Lcopy_forward_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	addl $0x20,	%edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	jmp  .Lhandle_tail
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) .Lcopy_backward:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	 * Calculate copy position to tail.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	addq %rdx,	%rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	addq %rdx,	%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 	subq $0x20,	%rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 	 * At most 3 ALU operations in one cycle,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	 * so append NOPS in the same 16 bytes trunk.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	.p2align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) .Lcopy_backward_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	subq $0x20,	%rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	movq -1*8(%rsi),	%r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	movq -2*8(%rsi),	%r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	movq -3*8(%rsi),	%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 	movq -4*8(%rsi),	%r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	leaq -4*8(%rsi),	%rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	movq %r8,		-1*8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	movq %r9,		-2*8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	movq %r10,		-3*8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	movq %r11,		-4*8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	leaq -4*8(%rdi),	%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 	jae  .Lcopy_backward_loop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	 * Calculate copy position to head.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	addl $0x20,	%edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	subq %rdx,	%rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	subq %rdx,	%rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) .Lhandle_tail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 	cmpl $16,	%edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	jb   .Lless_16bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	 * Move data from 16 bytes to 31 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	movq 0*8(%rsi), %r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	movq 1*8(%rsi),	%r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	movq -2*8(%rsi, %rdx),	%r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 	movq -1*8(%rsi, %rdx),	%r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	movq %r8,	0*8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 	movq %r9,	1*8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 	movq %r10,	-2*8(%rdi, %rdx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	movq %r11,	-1*8(%rdi, %rdx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	retq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 	.p2align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) .Lless_16bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	cmpl $8,	%edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 	jb   .Lless_8bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	 * Move data from 8 bytes to 15 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	movq 0*8(%rsi),	%r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	movq -1*8(%rsi, %rdx),	%r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	movq %r8,	0*8(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	movq %r9,	-1*8(%rdi, %rdx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 	retq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	.p2align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) .Lless_8bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 	cmpl $4,	%edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 	jb   .Lless_3bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	 * Move data from 4 bytes to 7 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	movl (%rsi), %ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 	movl -4(%rsi, %rdx), %r8d
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 	movl %ecx, (%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	movl %r8d, -4(%rdi, %rdx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 	retq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	.p2align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) .Lless_3bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	subl $1, %edx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 	jb .Lend
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	 * Move data from 1 bytes to 3 bytes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	movzbl (%rsi), %ecx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 	jz .Lstore_1byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	movzbq 1(%rsi), %r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	movzbq (%rsi, %rdx), %r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	movb %r8b, 1(%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	movb %r9b, (%rdi, %rdx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) .Lstore_1byte:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 	movb %cl, (%rdi)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) .Lend:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 	retq
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) SYM_FUNC_END(memcpy_orig)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) .popsection