Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * arch/alpha/lib/ev6-memcpy.S
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  * Reasonably optimized memcpy() routine for the Alpha 21264
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  *	- memory accessed as aligned quadwords only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  *	- uses bcmpge to compare 8 bytes in parallel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11)  * Much of the information about 21264 scheduling/coding comes from:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12)  *	Compiler Writer's Guide for the Alpha 21264
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13)  *	abbreviated as 'CWG' in other comments here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14)  *	ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15)  * Scheduling notation:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16)  *	E	- either cluster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17)  *	U	- upper subcluster; U0 - subcluster U0; U1 - subcluster U1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18)  *	L	- lower subcluster; L0 - subcluster L0; L1 - subcluster L1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20)  * Temp usage notes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21)  *	$1,$2,		- scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) 	.set noreorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) 	.set noat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) 	.align	4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 	.globl memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) 	.ent memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) memcpy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 	.frame $30,0,$26,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) 	.prologue 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) 	mov	$16, $0			# E : copy dest to return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	ble	$18, $nomoredata	# U : done with the copy?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	xor	$16, $17, $1		# E : are source and dest alignments the same?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 	and	$1, 7, $1		# E : are they the same mod 8?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 	bne	$1, $misaligned		# U : Nope - gotta do this the slow way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) 	/* source and dest are same mod 8 address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 	and	$16, 7, $1		# E : Are both 0mod8?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) 	beq	$1, $both_0mod8		# U : Yes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 	 * source and dest are same misalignment.  move a byte at a time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	 * until a 0mod8 alignment for both is reached.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) 	 * At least one byte more to move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51) $head_align:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52) 	ldbu	$1, 0($17)		# L : grab a byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53) 	subq	$18, 1, $18		# E : count--
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54) 	addq	$17, 1, $17		# E : src++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55) 	stb	$1, 0($16)		# L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56) 	addq	$16, 1, $16		# E : dest++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57) 	and	$16, 7, $1		# E : Are we at 0mod8 yet?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58) 	ble	$18, $nomoredata	# U : done with the copy?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59) 	bne	$1, $head_align		# U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) $both_0mod8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 	cmple	$18, 127, $1		# E : Can we unroll the loop?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) 	bne	$1, $no_unroll		# U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	and	$16, 63, $1		# E : get mod64 alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 	beq	$1, $do_unroll		# U : no single quads to fiddle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) $single_head_quad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	ldq	$1, 0($17)		# L : get 8 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	subq	$18, 8, $18		# E : count -= 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 	addq	$17, 8, $17		# E : src += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 	stq	$1, 0($16)		# L : store
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 	addq	$16, 8, $16		# E : dest += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	and	$16, 63, $1		# E : get mod64 alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 	bne	$1, $single_head_quad	# U : still not fully aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) $do_unroll:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 	addq	$16, 64, $7		# E : Initial (+1 trip) wh64 address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 	cmple	$18, 127, $1		# E : Can we go through the unrolled loop?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	bne	$1, $tail_quads		# U : Nope
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) 	nop				# E : 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) $unroll_body:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 	wh64	($7)			# L1 : memory subsystem hint: 64 bytes at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 					# ($7) are about to be over-written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) 	ldq	$6, 0($17)		# L0 : bytes 0..7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	ldq	$4, 8($17)		# L : bytes 8..15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 	ldq	$5, 16($17)		# L : bytes 16..23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	addq	$7, 64, $7		# E : Update next wh64 address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	ldq	$3, 24($17)		# L : bytes 24..31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	addq	$16, 64, $1		# E : fallback value for wh64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 	addq	$17, 32, $17		# E : src += 32 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 	stq	$6, 0($16)		# L : bytes 0..7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 	stq	$4, 8($16)		# L : bytes 8..15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	stq	$5, 16($16)		# L : bytes 16..23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	subq	$18, 192, $2		# E : At least two more trips to go?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 	stq	$3, 24($16)		# L : bytes 24..31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	addq	$16, 32, $16		# E : dest += 32 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) 	ldq	$6, 0($17)		# L : bytes 0..7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 	ldq	$4, 8($17)		# L : bytes 8..15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) 	cmovlt	$2, $1, $7		# E : Latency 2, extra map slot - Use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) 					# fallback wh64 address if < 2 more trips
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) 	ldq	$5, 16($17)		# L : bytes 16..23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	ldq	$3, 24($17)		# L : bytes 24..31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	addq	$16, 32, $16		# E : dest += 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	subq	$18, 64, $18		# E : count -= 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	addq	$17, 32, $17		# E : src += 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	stq	$6, -32($16)		# L : bytes 0..7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	stq	$4, -24($16)		# L : bytes 8..15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	cmple	$18, 63, $1		# E : At least one more trip?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	stq	$5, -16($16)		# L : bytes 16..23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	stq	$3, -8($16)		# L : bytes 24..31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 	beq	$1, $unroll_body
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) $tail_quads:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) $no_unroll:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 	.align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 	subq	$18, 8, $18		# E : At least a quad left?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 	blt	$18, $less_than_8	# U : Nope
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) $move_a_quad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 	ldq	$1, 0($17)		# L : fetch 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) 	subq	$18, 8, $18		# E : count -= 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	addq	$17, 8, $17		# E : src += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	stq	$1, 0($16)		# L : store 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 	addq	$16, 8, $16		# E : dest += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 	bge	$18, $move_a_quad	# U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) $less_than_8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	.align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 	addq	$18, 8, $18		# E : add back for trailing bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	ble	$18, $nomoredata	# U : All-done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 	/* Trailing bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) $tail_bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 	subq	$18, 1, $18		# E : count--
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	ldbu	$1, 0($17)		# L : fetch a byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) 	addq	$17, 1, $17		# E : src++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 	stb	$1, 0($16)		# L : store a byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	addq	$16, 1, $16		# E : dest++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) 	bgt	$18, $tail_bytes	# U : more to be done?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 	/* branching to exit takes 3 extra cycles, so replicate exit here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) 	ret	$31, ($26), 1		# L0 :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) $misaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) 	mov	$0, $4			# E : dest temp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 	and	$0, 7, $1		# E : dest alignment mod8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) 	beq	$1, $dest_0mod8		# U : life doesnt totally suck
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) 	nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) $aligndest:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) 	ble	$18, $nomoredata	# U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 	ldbu	$1, 0($17)		# L : fetch a byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	subq	$18, 1, $18		# E : count--
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	addq	$17, 1, $17		# E : src++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 	stb	$1, 0($4)		# L : store it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 	addq	$4, 1, $4		# E : dest++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	and	$4, 7, $1		# E : dest 0mod8 yet?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	bne	$1, $aligndest		# U : go until we are aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 	/* Source has unknown alignment, but dest is known to be 0mod8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) $dest_0mod8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 	subq	$18, 8, $18		# E : At least a quad left?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 	blt	$18, $misalign_tail	# U : Nope
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	ldq_u	$3, 0($17)		# L : seed (rotating load) of 8 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) $mis_quad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) 	ldq_u	$16, 8($17)		# L : Fetch next 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 	extql	$3, $17, $3		# U : masking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) 	extqh	$16, $17, $1		# U : masking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) 	bis	$3, $1, $1		# E : merged bytes to store
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) 	subq	$18, 8, $18		# E : count -= 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 	addq	$17, 8, $17		# E : src += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) 	stq	$1, 0($4)		# L : store 8 (aligned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 	mov	$16, $3			# E : "rotate" source data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	addq	$4, 8, $4		# E : dest += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 	bge	$18, $mis_quad		# U : More quads to move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 	nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) 	nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) $misalign_tail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 	addq	$18, 8, $18		# E : account for tail stuff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) 	ble	$18, $nomoredata	# U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 	nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) $misalign_byte:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 	ldbu	$1, 0($17)		# L : fetch 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) 	subq	$18, 1, $18		# E : count--
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 	addq	$17, 1, $17		# E : src++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 	stb	$1, 0($4)		# L : store
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	addq	$4, 1, $4		# E : dest++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 	bgt	$18, $misalign_byte	# U : more to go?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 	nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) $nomoredata:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	ret	$31, ($26), 1		# L0 :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 	nop				# E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 	.end memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 	EXPORT_SYMBOL(memcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) /* For backwards module compatibility.  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) __memcpy = memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) .globl __memcpy