^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) * Copyright (C) 2008-2009 Michal Simek <monstr@monstr.eu>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * Copyright (C) 2008-2009 PetaLogix
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) 2008 Jim Law - Iris LP All rights reserved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * This file is subject to the terms and conditions of the GNU General
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) * Public License. See the file COPYING in the main directory of this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) * archive for more details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) * Written by Jim Law <jlaw@irispower.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) * intended to replace:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * memcpy in memcpy.c and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * memmove in memmove.c
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * ... in arch/microblaze/lib
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * assly_fastcopy.S
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) * Attempt at quicker memcpy and memmove for MicroBlaze
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * Input : Operand1 in Reg r5 - destination address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * Operand2 in Reg r6 - source address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * Operand3 in Reg r7 - number of bytes to transfer
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * Output: Result in Reg r3 - starting destinaition address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * Explanation:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * Perform (possibly unaligned) copy of a block of memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * between mem locations with size of xfer spec'd in bytes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) #include <linux/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) .globl memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) .type memcpy, @function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) .ent memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) memcpy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) fast_memcpy_ascending:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) /* move d to return register as value of function */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) addi r3, r5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) addi r4, r0, 4 /* n = 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) cmpu r4, r4, r7 /* n = c - n (unsigned) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) blti r4, a_xfer_end /* if n < 0, less than one word to transfer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) /* transfer first 0~3 bytes to get aligned dest address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) andi r4, r5, 3 /* n = d & 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) /* if zero, destination already aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) beqi r4, a_dalign_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) /* n = 4 - n (yields 3, 2, 1 transfers for 1, 2, 3 addr offset) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) rsubi r4, r4, 4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) rsub r7, r4, r7 /* c = c - n adjust c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) a_xfer_first_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) /* if no bytes left to transfer, transfer the bulk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) beqi r4, a_dalign_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) lbui r11, r6, 0 /* h = *s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) sbi r11, r5, 0 /* *d = h */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) addi r6, r6, 1 /* s++ */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) addi r5, r5, 1 /* d++ */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) brid a_xfer_first_loop /* loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) addi r4, r4, -1 /* n-- (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) a_dalign_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) addi r4, r0, 32 /* n = 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) cmpu r4, r4, r7 /* n = c - n (unsigned) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) /* if n < 0, less than one block to transfer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) blti r4, a_block_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) a_block_xfer:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) andi r4, r7, 0xffffffe0 /* n = c & ~31 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) rsub r7, r4, r7 /* c = c - n */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) andi r9, r6, 3 /* t1 = s & 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /* if temp != 0, unaligned transfers needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) bnei r9, a_block_unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) a_block_aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) lwi r9, r6, 0 /* t1 = *(s + 0) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) lwi r10, r6, 4 /* t2 = *(s + 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) lwi r11, r6, 8 /* t3 = *(s + 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) lwi r12, r6, 12 /* t4 = *(s + 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) swi r9, r5, 0 /* *(d + 0) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) swi r10, r5, 4 /* *(d + 4) = t2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) swi r11, r5, 8 /* *(d + 8) = t3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) swi r12, r5, 12 /* *(d + 12) = t4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) lwi r9, r6, 16 /* t1 = *(s + 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) lwi r10, r6, 20 /* t2 = *(s + 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) lwi r11, r6, 24 /* t3 = *(s + 24) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) lwi r12, r6, 28 /* t4 = *(s + 28) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) swi r9, r5, 16 /* *(d + 16) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) swi r10, r5, 20 /* *(d + 20) = t2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) swi r11, r5, 24 /* *(d + 24) = t3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) swi r12, r5, 28 /* *(d + 28) = t4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) addi r6, r6, 32 /* s = s + 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) addi r4, r4, -32 /* n = n - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) bneid r4, a_block_aligned /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) bri a_block_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) a_block_unaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) andi r8, r6, 0xfffffffc /* as = s & ~3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) add r6, r6, r4 /* s = s + n */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) lwi r11, r8, 0 /* h = *(as + 0) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) addi r9, r9, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) beqi r9, a_block_u1 /* t1 was 1 => 1 byte offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) addi r9, r9, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) beqi r9, a_block_u2 /* t1 was 2 => 2 byte offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) a_block_u3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) bslli r11, r11, 24 /* h = h << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) a_bu3_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) lwi r12, r8, 4 /* v = *(as + 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) bsrli r9, r12, 8 /* t1 = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) swi r9, r5, 0 /* *(d + 0) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) bslli r11, r12, 24 /* h = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) lwi r12, r8, 8 /* v = *(as + 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) bsrli r9, r12, 8 /* t1 = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) swi r9, r5, 4 /* *(d + 4) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) bslli r11, r12, 24 /* h = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) lwi r12, r8, 12 /* v = *(as + 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) bsrli r9, r12, 8 /* t1 = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) swi r9, r5, 8 /* *(d + 8) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) bslli r11, r12, 24 /* h = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) lwi r12, r8, 16 /* v = *(as + 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) bsrli r9, r12, 8 /* t1 = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) swi r9, r5, 12 /* *(d + 12) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) bslli r11, r12, 24 /* h = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) lwi r12, r8, 20 /* v = *(as + 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) bsrli r9, r12, 8 /* t1 = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) swi r9, r5, 16 /* *(d + 16) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) bslli r11, r12, 24 /* h = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) lwi r12, r8, 24 /* v = *(as + 24) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) bsrli r9, r12, 8 /* t1 = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) swi r9, r5, 20 /* *(d + 20) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) bslli r11, r12, 24 /* h = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) lwi r12, r8, 28 /* v = *(as + 28) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) bsrli r9, r12, 8 /* t1 = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) swi r9, r5, 24 /* *(d + 24) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) bslli r11, r12, 24 /* h = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) lwi r12, r8, 32 /* v = *(as + 32) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) bsrli r9, r12, 8 /* t1 = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) swi r9, r5, 28 /* *(d + 28) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) bslli r11, r12, 24 /* h = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) addi r8, r8, 32 /* as = as + 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) addi r4, r4, -32 /* n = n - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) bneid r4, a_bu3_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) bri a_block_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) a_block_u1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) bslli r11, r11, 8 /* h = h << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) a_bu1_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) lwi r12, r8, 4 /* v = *(as + 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) bsrli r9, r12, 24 /* t1 = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) swi r9, r5, 0 /* *(d + 0) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) bslli r11, r12, 8 /* h = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) lwi r12, r8, 8 /* v = *(as + 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) bsrli r9, r12, 24 /* t1 = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) swi r9, r5, 4 /* *(d + 4) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) bslli r11, r12, 8 /* h = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) lwi r12, r8, 12 /* v = *(as + 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) bsrli r9, r12, 24 /* t1 = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) swi r9, r5, 8 /* *(d + 8) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) bslli r11, r12, 8 /* h = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) lwi r12, r8, 16 /* v = *(as + 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) bsrli r9, r12, 24 /* t1 = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) swi r9, r5, 12 /* *(d + 12) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) bslli r11, r12, 8 /* h = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) lwi r12, r8, 20 /* v = *(as + 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) bsrli r9, r12, 24 /* t1 = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) swi r9, r5, 16 /* *(d + 16) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) bslli r11, r12, 8 /* h = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) lwi r12, r8, 24 /* v = *(as + 24) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) bsrli r9, r12, 24 /* t1 = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) swi r9, r5, 20 /* *(d + 20) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) bslli r11, r12, 8 /* h = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) lwi r12, r8, 28 /* v = *(as + 28) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) bsrli r9, r12, 24 /* t1 = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) swi r9, r5, 24 /* *(d + 24) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) bslli r11, r12, 8 /* h = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) lwi r12, r8, 32 /* v = *(as + 32) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) bsrli r9, r12, 24 /* t1 = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) swi r9, r5, 28 /* *(d + 28) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) bslli r11, r12, 8 /* h = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) addi r8, r8, 32 /* as = as + 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) addi r4, r4, -32 /* n = n - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) bneid r4, a_bu1_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) bri a_block_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) a_block_u2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) bslli r11, r11, 16 /* h = h << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) a_bu2_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) lwi r12, r8, 4 /* v = *(as + 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) bsrli r9, r12, 16 /* t1 = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) swi r9, r5, 0 /* *(d + 0) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) bslli r11, r12, 16 /* h = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) lwi r12, r8, 8 /* v = *(as + 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) bsrli r9, r12, 16 /* t1 = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) swi r9, r5, 4 /* *(d + 4) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) bslli r11, r12, 16 /* h = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) lwi r12, r8, 12 /* v = *(as + 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) bsrli r9, r12, 16 /* t1 = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) swi r9, r5, 8 /* *(d + 8) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) bslli r11, r12, 16 /* h = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) lwi r12, r8, 16 /* v = *(as + 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) bsrli r9, r12, 16 /* t1 = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) swi r9, r5, 12 /* *(d + 12) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) bslli r11, r12, 16 /* h = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) lwi r12, r8, 20 /* v = *(as + 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) bsrli r9, r12, 16 /* t1 = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) swi r9, r5, 16 /* *(d + 16) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) bslli r11, r12, 16 /* h = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) lwi r12, r8, 24 /* v = *(as + 24) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) bsrli r9, r12, 16 /* t1 = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) swi r9, r5, 20 /* *(d + 20) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) bslli r11, r12, 16 /* h = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) lwi r12, r8, 28 /* v = *(as + 28) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) bsrli r9, r12, 16 /* t1 = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) swi r9, r5, 24 /* *(d + 24) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) bslli r11, r12, 16 /* h = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) lwi r12, r8, 32 /* v = *(as + 32) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) bsrli r9, r12, 16 /* t1 = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) swi r9, r5, 28 /* *(d + 28) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) bslli r11, r12, 16 /* h = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) addi r8, r8, 32 /* as = as + 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) addi r4, r4, -32 /* n = n - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) bneid r4, a_bu2_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) addi r5, r5, 32 /* d = d + 32 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) a_block_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) addi r4, r0, 4 /* n = 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) cmpu r4, r4, r7 /* n = c - n (unsigned) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) blti r4, a_xfer_end /* if n < 0, less than one word to transfer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) a_word_xfer:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) andi r4, r7, 0xfffffffc /* n = c & ~3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) addi r10, r0, 0 /* offset = 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) andi r9, r6, 3 /* t1 = s & 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) /* if temp != 0, unaligned transfers needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) bnei r9, a_word_unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) a_word_aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) lw r9, r6, r10 /* t1 = *(s+offset) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) sw r9, r5, r10 /* *(d+offset) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) addi r4, r4,-4 /* n-- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) bneid r4, a_word_aligned /* loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) addi r10, r10, 4 /* offset++ (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) bri a_word_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) a_word_unaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) andi r8, r6, 0xfffffffc /* as = s & ~3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) lwi r11, r8, 0 /* h = *(as + 0) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) addi r8, r8, 4 /* as = as + 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) addi r9, r9, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) beqi r9, a_word_u1 /* t1 was 1 => 1 byte offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) addi r9, r9, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) beqi r9, a_word_u2 /* t1 was 2 => 2 byte offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) a_word_u3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) bslli r11, r11, 24 /* h = h << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) a_wu3_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) lw r12, r8, r10 /* v = *(as + offset) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) bsrli r9, r12, 8 /* t1 = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) sw r9, r5, r10 /* *(d + offset) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) bslli r11, r12, 24 /* h = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) addi r4, r4,-4 /* n = n - 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) bneid r4, a_wu3_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) bri a_word_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) a_word_u1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) bslli r11, r11, 8 /* h = h << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) a_wu1_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) lw r12, r8, r10 /* v = *(as + offset) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) bsrli r9, r12, 24 /* t1 = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) sw r9, r5, r10 /* *(d + offset) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) bslli r11, r12, 8 /* h = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) addi r4, r4,-4 /* n = n - 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) bneid r4, a_wu1_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) bri a_word_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) a_word_u2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) bslli r11, r11, 16 /* h = h << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) a_wu2_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) lw r12, r8, r10 /* v = *(as + offset) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) bsrli r9, r12, 16 /* t1 = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) sw r9, r5, r10 /* *(d + offset) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) bslli r11, r12, 16 /* h = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) addi r4, r4,-4 /* n = n - 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) bneid r4, a_wu2_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) addi r10, r10, 4 /* offset = ofset + 4 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) a_word_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) add r5, r5, r10 /* d = d + offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) add r6, r6, r10 /* s = s + offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) rsub r7, r10, r7 /* c = c - offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) a_xfer_end:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) a_xfer_end_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) beqi r7, a_done /* while (c) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) lbui r9, r6, 0 /* t1 = *s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) addi r6, r6, 1 /* s++ */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) sbi r9, r5, 0 /* *d = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) addi r7, r7, -1 /* c-- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) brid a_xfer_end_loop /* loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) addi r5, r5, 1 /* d++ (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) a_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) rtsd r15, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) .size memcpy, . - memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) .end memcpy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) /*----------------------------------------------------------------------------*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) .globl memmove
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) .type memmove, @function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) .ent memmove
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) memmove:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) cmpu r4, r5, r6 /* n = s - d */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) bgei r4,fast_memcpy_ascending
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) fast_memcpy_descending:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) /* move d to return register as value of function */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) addi r3, r5, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) add r5, r5, r7 /* d = d + c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) add r6, r6, r7 /* s = s + c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) addi r4, r0, 4 /* n = 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) cmpu r4, r4, r7 /* n = c - n (unsigned) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) blti r4,d_xfer_end /* if n < 0, less than one word to transfer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) /* transfer first 0~3 bytes to get aligned dest address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) andi r4, r5, 3 /* n = d & 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) /* if zero, destination already aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) beqi r4,d_dalign_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) rsub r7, r4, r7 /* c = c - n adjust c */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) d_xfer_first_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) /* if no bytes left to transfer, transfer the bulk */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) beqi r4,d_dalign_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) addi r6, r6, -1 /* s-- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) addi r5, r5, -1 /* d-- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) lbui r11, r6, 0 /* h = *s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) sbi r11, r5, 0 /* *d = h */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) brid d_xfer_first_loop /* loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) addi r4, r4, -1 /* n-- (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) d_dalign_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) addi r4, r0, 32 /* n = 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) cmpu r4, r4, r7 /* n = c - n (unsigned) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) /* if n < 0, less than one block to transfer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) blti r4, d_block_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) d_block_xfer:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) andi r4, r7, 0xffffffe0 /* n = c & ~31 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) rsub r7, r4, r7 /* c = c - n */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) andi r9, r6, 3 /* t1 = s & 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) /* if temp != 0, unaligned transfers needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) bnei r9, d_block_unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) d_block_aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) addi r6, r6, -32 /* s = s - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) addi r5, r5, -32 /* d = d - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) lwi r9, r6, 28 /* t1 = *(s + 28) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) lwi r10, r6, 24 /* t2 = *(s + 24) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) lwi r11, r6, 20 /* t3 = *(s + 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) lwi r12, r6, 16 /* t4 = *(s + 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) swi r9, r5, 28 /* *(d + 28) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) swi r10, r5, 24 /* *(d + 24) = t2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) swi r11, r5, 20 /* *(d + 20) = t3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) swi r12, r5, 16 /* *(d + 16) = t4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) lwi r9, r6, 12 /* t1 = *(s + 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) lwi r10, r6, 8 /* t2 = *(s + 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) lwi r11, r6, 4 /* t3 = *(s + 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) lwi r12, r6, 0 /* t4 = *(s + 0) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) swi r9, r5, 12 /* *(d + 12) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) swi r10, r5, 8 /* *(d + 8) = t2 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) swi r11, r5, 4 /* *(d + 4) = t3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) addi r4, r4, -32 /* n = n - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) bneid r4, d_block_aligned /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) swi r12, r5, 0 /* *(d + 0) = t4 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) bri d_block_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) d_block_unaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) andi r8, r6, 0xfffffffc /* as = s & ~3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) rsub r6, r4, r6 /* s = s - n */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) lwi r11, r8, 0 /* h = *(as + 0) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) addi r9, r9, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) beqi r9,d_block_u1 /* t1 was 1 => 1 byte offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) addi r9, r9, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) beqi r9,d_block_u2 /* t1 was 2 => 2 byte offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) d_block_u3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) bsrli r11, r11, 8 /* h = h >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) d_bu3_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) addi r8, r8, -32 /* as = as - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) addi r5, r5, -32 /* d = d - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) lwi r12, r8, 28 /* v = *(as + 28) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) bslli r9, r12, 24 /* t1 = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) swi r9, r5, 28 /* *(d + 28) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) bsrli r11, r12, 8 /* h = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) lwi r12, r8, 24 /* v = *(as + 24) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) bslli r9, r12, 24 /* t1 = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) swi r9, r5, 24 /* *(d + 24) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) bsrli r11, r12, 8 /* h = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) lwi r12, r8, 20 /* v = *(as + 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) bslli r9, r12, 24 /* t1 = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) swi r9, r5, 20 /* *(d + 20) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) bsrli r11, r12, 8 /* h = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) lwi r12, r8, 16 /* v = *(as + 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) bslli r9, r12, 24 /* t1 = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) swi r9, r5, 16 /* *(d + 16) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) bsrli r11, r12, 8 /* h = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) lwi r12, r8, 12 /* v = *(as + 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) bslli r9, r12, 24 /* t1 = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) swi r9, r5, 12 /* *(d + 112) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) bsrli r11, r12, 8 /* h = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) lwi r12, r8, 8 /* v = *(as + 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) bslli r9, r12, 24 /* t1 = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) swi r9, r5, 8 /* *(d + 8) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) bsrli r11, r12, 8 /* h = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) lwi r12, r8, 4 /* v = *(as + 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) bslli r9, r12, 24 /* t1 = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) swi r9, r5, 4 /* *(d + 4) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) bsrli r11, r12, 8 /* h = v >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) lwi r12, r8, 0 /* v = *(as + 0) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) bslli r9, r12, 24 /* t1 = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) swi r9, r5, 0 /* *(d + 0) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) addi r4, r4, -32 /* n = n - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) bneid r4, d_bu3_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) bsrli r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) bri d_block_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) d_block_u1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) bsrli r11, r11, 24 /* h = h >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) d_bu1_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) addi r8, r8, -32 /* as = as - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) addi r5, r5, -32 /* d = d - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) lwi r12, r8, 28 /* v = *(as + 28) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) bslli r9, r12, 8 /* t1 = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) swi r9, r5, 28 /* *(d + 28) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) bsrli r11, r12, 24 /* h = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) lwi r12, r8, 24 /* v = *(as + 24) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) bslli r9, r12, 8 /* t1 = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) swi r9, r5, 24 /* *(d + 24) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) bsrli r11, r12, 24 /* h = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) lwi r12, r8, 20 /* v = *(as + 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) bslli r9, r12, 8 /* t1 = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) swi r9, r5, 20 /* *(d + 20) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) bsrli r11, r12, 24 /* h = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) lwi r12, r8, 16 /* v = *(as + 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) bslli r9, r12, 8 /* t1 = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) swi r9, r5, 16 /* *(d + 16) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) bsrli r11, r12, 24 /* h = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) lwi r12, r8, 12 /* v = *(as + 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) bslli r9, r12, 8 /* t1 = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) swi r9, r5, 12 /* *(d + 112) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) bsrli r11, r12, 24 /* h = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) lwi r12, r8, 8 /* v = *(as + 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) bslli r9, r12, 8 /* t1 = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) swi r9, r5, 8 /* *(d + 8) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) bsrli r11, r12, 24 /* h = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) lwi r12, r8, 4 /* v = *(as + 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) bslli r9, r12, 8 /* t1 = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) swi r9, r5, 4 /* *(d + 4) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) bsrli r11, r12, 24 /* h = v >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) lwi r12, r8, 0 /* v = *(as + 0) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) bslli r9, r12, 8 /* t1 = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) swi r9, r5, 0 /* *(d + 0) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) addi r4, r4, -32 /* n = n - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) bneid r4, d_bu1_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) bsrli r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) bri d_block_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) d_block_u2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) bsrli r11, r11, 16 /* h = h >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) d_bu2_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) addi r8, r8, -32 /* as = as - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) addi r5, r5, -32 /* d = d - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) lwi r12, r8, 28 /* v = *(as + 28) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) bslli r9, r12, 16 /* t1 = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) swi r9, r5, 28 /* *(d + 28) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) bsrli r11, r12, 16 /* h = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) lwi r12, r8, 24 /* v = *(as + 24) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) bslli r9, r12, 16 /* t1 = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) swi r9, r5, 24 /* *(d + 24) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) bsrli r11, r12, 16 /* h = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) lwi r12, r8, 20 /* v = *(as + 20) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) bslli r9, r12, 16 /* t1 = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) swi r9, r5, 20 /* *(d + 20) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) bsrli r11, r12, 16 /* h = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) lwi r12, r8, 16 /* v = *(as + 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) bslli r9, r12, 16 /* t1 = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) swi r9, r5, 16 /* *(d + 16) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) bsrli r11, r12, 16 /* h = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) lwi r12, r8, 12 /* v = *(as + 12) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) bslli r9, r12, 16 /* t1 = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) swi r9, r5, 12 /* *(d + 112) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) bsrli r11, r12, 16 /* h = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) lwi r12, r8, 8 /* v = *(as + 8) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) bslli r9, r12, 16 /* t1 = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) swi r9, r5, 8 /* *(d + 8) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) bsrli r11, r12, 16 /* h = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567) lwi r12, r8, 4 /* v = *(as + 4) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) bslli r9, r12, 16 /* t1 = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) swi r9, r5, 4 /* *(d + 4) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571) bsrli r11, r12, 16 /* h = v >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) lwi r12, r8, 0 /* v = *(as + 0) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) bslli r9, r12, 16 /* t1 = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) swi r9, r5, 0 /* *(d + 0) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) addi r4, r4, -32 /* n = n - 32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) bneid r4, d_bu2_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) bsrli r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) d_block_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) addi r4, r0, 4 /* n = 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) cmpu r4, r4, r7 /* n = c - n (unsigned) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) blti r4,d_xfer_end /* if n < 0, less than one word to transfer */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) d_word_xfer:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) andi r4, r7, 0xfffffffc /* n = c & ~3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) rsub r5, r4, r5 /* d = d - n */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) rsub r6, r4, r6 /* s = s - n */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) rsub r7, r4, r7 /* c = c - n */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) andi r9, r6, 3 /* t1 = s & 3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) /* if temp != 0, unaligned transfers needed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) bnei r9, d_word_unaligned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) d_word_aligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) addi r4, r4,-4 /* n-- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) lw r9, r6, r4 /* t1 = *(s+n) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) bneid r4, d_word_aligned /* loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) sw r9, r5, r4 /* *(d+n) = t1 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) bri d_word_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) d_word_unaligned:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) andi r8, r6, 0xfffffffc /* as = s & ~3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) lw r11, r8, r4 /* h = *(as + n) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) addi r9, r9, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) beqi r9,d_word_u1 /* t1 was 1 => 1 byte offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609) addi r9, r9, -1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) beqi r9,d_word_u2 /* t1 was 2 => 2 byte offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) d_word_u3:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) bsrli r11, r11, 8 /* h = h >> 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) d_wu3_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) addi r4, r4,-4 /* n = n - 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) lw r12, r8, r4 /* v = *(as + n) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) bslli r9, r12, 24 /* t1 = v << 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) sw r9, r5, r4 /* *(d + n) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) bneid r4, d_wu3_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621) bsrli r11, r12, 8 /* h = v >> 8 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) bri d_word_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) d_word_u1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) bsrli r11, r11, 24 /* h = h >> 24 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627) d_wu1_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) addi r4, r4,-4 /* n = n - 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) lw r12, r8, r4 /* v = *(as + n) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) bslli r9, r12, 8 /* t1 = v << 8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) sw r9, r5, r4 /* *(d + n) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633) bneid r4, d_wu1_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) bsrli r11, r12, 24 /* h = v >> 24 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) bri d_word_done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) d_word_u2:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) bsrli r11, r11, 16 /* h = h >> 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) d_wu2_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) addi r4, r4,-4 /* n = n - 4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 642) lw r12, r8, r4 /* v = *(as + n) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 643) bslli r9, r12, 16 /* t1 = v << 16 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 644) or r9, r11, r9 /* t1 = h | t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 645) sw r9, r5, r4 /* *(d + n) = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 646) bneid r4, d_wu2_loop /* while (n) loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 647) bsrli r11, r12, 16 /* h = v >> 16 (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 648)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 649) d_word_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 650)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 651) d_xfer_end:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 652) d_xfer_end_loop:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 653) beqi r7, a_done /* while (c) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 654) addi r6, r6, -1 /* s-- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 655) lbui r9, r6, 0 /* t1 = *s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 656) addi r5, r5, -1 /* d-- */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 657) sbi r9, r5, 0 /* *d = t1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 658) brid d_xfer_end_loop /* loop */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 659) addi r7, r7, -1 /* c-- (IN DELAY SLOT) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 660)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 661) d_done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 662) rtsd r15, 8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 663) nop
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 664)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 665) .size memmove, . - memmove
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 666) .end memmove