^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * arch/alpha/lib/ev6-memcpy.S
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * 21264 version by Rick Gorton <rick.gorton@alpha-processor.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Reasonably optimized memcpy() routine for the Alpha 21264
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * - memory accessed as aligned quadwords only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) * - uses bcmpge to compare 8 bytes in parallel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) * Much of the information about 21264 scheduling/coding comes from:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * Compiler Writer's Guide for the Alpha 21264
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * abbreviated as 'CWG' in other comments here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * ftp.digital.com/pub/Digital/info/semiconductor/literature/dsc-library.html
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * Scheduling notation:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * E - either cluster
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * U - upper subcluster; U0 - subcluster U0; U1 - subcluster U1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * L - lower subcluster; L0 - subcluster L0; L1 - subcluster L1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * Temp usage notes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * $1,$2, - scratch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) .set noreorder
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) .set noat
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) .globl memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) .ent memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) memcpy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) .frame $30,0,$26,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) .prologue 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) mov $16, $0 # E : copy dest to return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) ble $18, $nomoredata # U : done with the copy?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) xor $16, $17, $1 # E : are source and dest alignments the same?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) and $1, 7, $1 # E : are they the same mod 8?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) bne $1, $misaligned # U : Nope - gotta do this the slow way
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) /* source and dest are same mod 8 address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) and $16, 7, $1 # E : Are both 0mod8?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) beq $1, $both_0mod8 # U : Yes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * source and dest are same misalignment. move a byte at a time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) * until a 0mod8 alignment for both is reached.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) * At least one byte more to move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) $head_align:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) ldbu $1, 0($17) # L : grab a byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) subq $18, 1, $18 # E : count--
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) addq $17, 1, $17 # E : src++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) stb $1, 0($16) # L :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) addq $16, 1, $16 # E : dest++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) and $16, 7, $1 # E : Are we at 0mod8 yet?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) ble $18, $nomoredata # U : done with the copy?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) bne $1, $head_align # U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) $both_0mod8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) cmple $18, 127, $1 # E : Can we unroll the loop?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) bne $1, $no_unroll # U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) and $16, 63, $1 # E : get mod64 alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) beq $1, $do_unroll # U : no single quads to fiddle
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) $single_head_quad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) ldq $1, 0($17) # L : get 8 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) subq $18, 8, $18 # E : count -= 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) addq $17, 8, $17 # E : src += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) stq $1, 0($16) # L : store
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) addq $16, 8, $16 # E : dest += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) and $16, 63, $1 # E : get mod64 alignment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) bne $1, $single_head_quad # U : still not fully aligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) $do_unroll:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) addq $16, 64, $7 # E : Initial (+1 trip) wh64 address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) cmple $18, 127, $1 # E : Can we go through the unrolled loop?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) bne $1, $tail_quads # U : Nope
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) $unroll_body:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) wh64 ($7) # L1 : memory subsystem hint: 64 bytes at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) # ($7) are about to be over-written
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) ldq $6, 0($17) # L0 : bytes 0..7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) ldq $4, 8($17) # L : bytes 8..15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) ldq $5, 16($17) # L : bytes 16..23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) addq $7, 64, $7 # E : Update next wh64 address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) ldq $3, 24($17) # L : bytes 24..31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) addq $16, 64, $1 # E : fallback value for wh64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) addq $17, 32, $17 # E : src += 32 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) stq $6, 0($16) # L : bytes 0..7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) stq $4, 8($16) # L : bytes 8..15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) stq $5, 16($16) # L : bytes 16..23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) subq $18, 192, $2 # E : At least two more trips to go?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) stq $3, 24($16) # L : bytes 24..31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) addq $16, 32, $16 # E : dest += 32 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) ldq $6, 0($17) # L : bytes 0..7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) ldq $4, 8($17) # L : bytes 8..15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) cmovlt $2, $1, $7 # E : Latency 2, extra map slot - Use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) # fallback wh64 address if < 2 more trips
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) ldq $5, 16($17) # L : bytes 16..23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) ldq $3, 24($17) # L : bytes 24..31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) addq $16, 32, $16 # E : dest += 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) subq $18, 64, $18 # E : count -= 64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) addq $17, 32, $17 # E : src += 32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) stq $6, -32($16) # L : bytes 0..7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) stq $4, -24($16) # L : bytes 8..15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) cmple $18, 63, $1 # E : At least one more trip?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) stq $5, -16($16) # L : bytes 16..23
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) stq $3, -8($16) # L : bytes 24..31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) beq $1, $unroll_body
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) $tail_quads:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) $no_unroll:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) subq $18, 8, $18 # E : At least a quad left?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) blt $18, $less_than_8 # U : Nope
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) $move_a_quad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) ldq $1, 0($17) # L : fetch 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) subq $18, 8, $18 # E : count -= 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) addq $17, 8, $17 # E : src += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) stq $1, 0($16) # L : store 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) addq $16, 8, $16 # E : dest += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) bge $18, $move_a_quad # U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) $less_than_8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) .align 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) addq $18, 8, $18 # E : add back for trailing bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) ble $18, $nomoredata # U : All-done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) /* Trailing bytes */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) $tail_bytes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) subq $18, 1, $18 # E : count--
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) ldbu $1, 0($17) # L : fetch a byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) addq $17, 1, $17 # E : src++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) stb $1, 0($16) # L : store a byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) addq $16, 1, $16 # E : dest++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) bgt $18, $tail_bytes # U : more to be done?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) /* branching to exit takes 3 extra cycles, so replicate exit here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) ret $31, ($26), 1 # L0 :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) $misaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) mov $0, $4 # E : dest temp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) and $0, 7, $1 # E : dest alignment mod8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) beq $1, $dest_0mod8 # U : life doesnt totally suck
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) $aligndest:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) ble $18, $nomoredata # U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) ldbu $1, 0($17) # L : fetch a byte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) subq $18, 1, $18 # E : count--
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) addq $17, 1, $17 # E : src++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) stb $1, 0($4) # L : store it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) addq $4, 1, $4 # E : dest++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) and $4, 7, $1 # E : dest 0mod8 yet?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) bne $1, $aligndest # U : go until we are aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) /* Source has unknown alignment, but dest is known to be 0mod8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) $dest_0mod8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) subq $18, 8, $18 # E : At least a quad left?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) blt $18, $misalign_tail # U : Nope
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) ldq_u $3, 0($17) # L : seed (rotating load) of 8 bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) $mis_quad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) ldq_u $16, 8($17) # L : Fetch next 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) extql $3, $17, $3 # U : masking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) extqh $16, $17, $1 # U : masking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) bis $3, $1, $1 # E : merged bytes to store
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) subq $18, 8, $18 # E : count -= 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) addq $17, 8, $17 # E : src += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) stq $1, 0($4) # L : store 8 (aligned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) mov $16, $3 # E : "rotate" source data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) addq $4, 8, $4 # E : dest += 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) bge $18, $mis_quad # U : More quads to move
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) $misalign_tail:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) addq $18, 8, $18 # E : account for tail stuff
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) ble $18, $nomoredata # U :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) $misalign_byte:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) ldbu $1, 0($17) # L : fetch 1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) subq $18, 1, $18 # E : count--
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) addq $17, 1, $17 # E : src++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) stb $1, 0($4) # L : store
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) addq $4, 1, $4 # E : dest++
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) bgt $18, $misalign_byte # U : more to go?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) $nomoredata:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) ret $31, ($26), 1 # L0 :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) nop # E :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) .end memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) EXPORT_SYMBOL(memcpy)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) /* For backwards module compatibility. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) __memcpy = memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) .globl __memcpy