^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) IBM Corporation, 2012
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Author: Anton Blanchard <anton@au.ibm.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <asm/ppc_asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #ifndef SELFTEST_CASE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) /* 0 == don't use VMX, 1 == use VMX */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) #define SELFTEST_CASE 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) #ifdef __BIG_ENDIAN__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) #define LVS(VRT,RA,RB) lvsl VRT,RA,RB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRA,VRB,VRC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) #define LVS(VRT,RA,RB) lvsr VRT,RA,RB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) #define VPERM(VRT,VRA,VRB,VRC) vperm VRT,VRB,VRA,VRC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) _GLOBAL(memcpy_power7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) cmpldi r5,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) cmpldi cr1,r5,4096
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) blt .Lshort_copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) #ifdef CONFIG_ALTIVEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) test_feature = SELFTEST_CASE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) BEGIN_FTR_SECTION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) bgt cr1, .Lvmx_copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) .Lnonvmx_copy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) /* Get the source 8B aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) neg r6,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) mtocrf 0x01,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) clrldi r6,r6,(64-3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) bf cr7*4+3,1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) lbz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) addi r4,r4,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) stb r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) addi r3,r3,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) 1: bf cr7*4+2,2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) lhz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) addi r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) sth r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) addi r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) 2: bf cr7*4+1,3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) lwz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) addi r4,r4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) stw r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) addi r3,r3,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) 3: sub r5,r5,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) cmpldi r5,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) blt 5f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) mflr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) stdu r1,-STACKFRAMESIZE(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) std r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) std r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) std r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) std r17,STK_REG(R17)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) std r18,STK_REG(R18)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) std r19,STK_REG(R19)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) std r20,STK_REG(R20)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) std r21,STK_REG(R21)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) std r22,STK_REG(R22)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) std r0,STACKFRAMESIZE+16(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) srdi r6,r5,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) mtctr r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) /* Now do cacheline (128B) sized loads and stores. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) 4:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) ld r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) ld r6,8(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) ld r7,16(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) ld r8,24(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) ld r9,32(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) ld r10,40(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) ld r11,48(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) ld r12,56(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) ld r14,64(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) ld r15,72(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) ld r16,80(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) ld r17,88(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) ld r18,96(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) ld r19,104(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) ld r20,112(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) ld r21,120(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) addi r4,r4,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) std r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) std r6,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) std r7,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) std r8,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) std r9,32(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) std r10,40(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) std r11,48(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) std r12,56(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) std r14,64(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) std r15,72(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) std r16,80(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) std r17,88(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) std r18,96(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) std r19,104(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) std r20,112(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) std r21,120(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) addi r3,r3,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) bdnz 4b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) clrldi r5,r5,(64-7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) ld r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) ld r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) ld r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) ld r17,STK_REG(R17)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) ld r18,STK_REG(R18)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) ld r19,STK_REG(R19)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) ld r20,STK_REG(R20)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) ld r21,STK_REG(R21)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) ld r22,STK_REG(R22)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) addi r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) /* Up to 127B to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 5: srdi r6,r5,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) mtocrf 0x01,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 6: bf cr7*4+1,7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) ld r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) ld r6,8(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) ld r7,16(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) ld r8,24(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) ld r9,32(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) ld r10,40(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) ld r11,48(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) ld r12,56(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) addi r4,r4,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) std r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) std r6,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) std r7,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) std r8,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) std r9,32(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) std r10,40(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) std r11,48(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) std r12,56(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) addi r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) /* Up to 63B to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 7: bf cr7*4+2,8f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) ld r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) ld r6,8(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) ld r7,16(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) ld r8,24(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) addi r4,r4,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) std r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) std r6,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) std r7,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) std r8,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) addi r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) /* Up to 31B to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) 8: bf cr7*4+3,9f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) ld r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) ld r6,8(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) addi r4,r4,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) std r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) std r6,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) addi r3,r3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 9: clrldi r5,r5,(64-4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) /* Up to 15B to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) .Lshort_copy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) mtocrf 0x01,r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) bf cr7*4+0,12f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) lwz r0,0(r4) /* Less chance of a reject with word ops */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) lwz r6,4(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) addi r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) stw r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) stw r6,4(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) addi r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 12: bf cr7*4+1,13f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) lwz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) addi r4,r4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) stw r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) addi r3,r3,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 13: bf cr7*4+2,14f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) lhz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) addi r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) sth r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) addi r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 14: bf cr7*4+3,15f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) lbz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) stb r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) 15: ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) .Lunwind_stack_nonvmx_copy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) addi r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) b .Lnonvmx_copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) .Lvmx_copy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) #ifdef CONFIG_ALTIVEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) mflr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) std r5,-STACKFRAMESIZE+STK_REG(R29)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) std r0,16(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) stdu r1,-STACKFRAMESIZE(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) bl enter_vmx_ops
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) cmpwi cr1,r3,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) ld r0,STACKFRAMESIZE+16(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) ld r3,STK_REG(R31)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) ld r4,STK_REG(R30)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) ld r5,STK_REG(R29)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) mtlr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) * We prefetch both the source and destination using enhanced touch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) * instructions. We use a stream ID of 0 for the load side and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) * 1 for the store side.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) clrrdi r6,r4,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) clrrdi r9,r3,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) ori r9,r9,1 /* stream=1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) srdi r7,r5,7 /* length in cachelines, capped at 0x3FF */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) cmpldi r7,0x3FF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) ble 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) li r7,0x3FF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 1: lis r0,0x0E00 /* depth=7 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) sldi r7,r7,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) or r7,r7,r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) ori r10,r7,1 /* stream=1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) lis r8,0x8000 /* GO=1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) clrldi r8,r8,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) dcbt 0,r6,0b01000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) dcbt 0,r7,0b01010
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) dcbtst 0,r9,0b01000
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) dcbtst 0,r10,0b01010
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) eieio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) dcbt 0,r8,0b01010 /* GO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) beq cr1,.Lunwind_stack_nonvmx_copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) * If source and destination are not relatively aligned we use a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * slower permute loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) xor r6,r4,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) rldicl. r6,r6,0,(64-4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) bne .Lvmx_unaligned_copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) /* Get the destination 16B aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) neg r6,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) mtocrf 0x01,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) clrldi r6,r6,(64-4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) bf cr7*4+3,1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) lbz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) addi r4,r4,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) stb r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) addi r3,r3,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 1: bf cr7*4+2,2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) lhz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) addi r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) sth r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) addi r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 2: bf cr7*4+1,3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) lwz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) addi r4,r4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) stw r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) addi r3,r3,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 3: bf cr7*4+0,4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) ld r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) addi r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) std r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) addi r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 4: sub r5,r5,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) /* Get the desination 128B aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) neg r6,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) srdi r7,r6,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) mtocrf 0x01,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) clrldi r6,r6,(64-7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) li r9,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) li r10,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) li r11,48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) bf cr7*4+3,5f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) lvx v1,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) addi r4,r4,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) stvx v1,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) addi r3,r3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 5: bf cr7*4+2,6f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) lvx v1,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) lvx v0,r4,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) addi r4,r4,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) stvx v1,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) stvx v0,r3,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) addi r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 6: bf cr7*4+1,7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) lvx v3,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) lvx v2,r4,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) lvx v1,r4,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) lvx v0,r4,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) addi r4,r4,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) stvx v3,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) stvx v2,r3,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) stvx v1,r3,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) stvx v0,r3,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) addi r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 7: sub r5,r5,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) srdi r6,r5,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) std r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) std r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) std r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) li r12,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) li r14,80
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) li r15,96
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) li r16,112
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) mtctr r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) * Now do cacheline sized loads and stores. By this stage the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) * cacheline stores are also cacheline aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) lvx v7,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) lvx v6,r4,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) lvx v5,r4,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) lvx v4,r4,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) lvx v3,r4,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) lvx v2,r4,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) lvx v1,r4,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) lvx v0,r4,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) addi r4,r4,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) stvx v7,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) stvx v6,r3,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) stvx v5,r3,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) stvx v4,r3,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) stvx v3,r3,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) stvx v2,r3,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) stvx v1,r3,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) stvx v0,r3,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) addi r3,r3,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) bdnz 8b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) ld r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) ld r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) ld r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) /* Up to 127B to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) clrldi r5,r5,(64-7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) srdi r6,r5,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) mtocrf 0x01,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) bf cr7*4+1,9f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) lvx v3,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) lvx v2,r4,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) lvx v1,r4,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) lvx v0,r4,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) addi r4,r4,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) stvx v3,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) stvx v2,r3,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) stvx v1,r3,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) stvx v0,r3,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) addi r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) 9: bf cr7*4+2,10f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) lvx v1,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) lvx v0,r4,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) addi r4,r4,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) stvx v1,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) stvx v0,r3,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) addi r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 10: bf cr7*4+3,11f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) lvx v1,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) addi r4,r4,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) stvx v1,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) addi r3,r3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) /* Up to 15B to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 11: clrldi r5,r5,(64-4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) mtocrf 0x01,r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) bf cr7*4+0,12f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) ld r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) addi r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) std r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) addi r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 12: bf cr7*4+1,13f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) lwz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) addi r4,r4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) stw r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) addi r3,r3,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 13: bf cr7*4+2,14f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) lhz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) addi r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) sth r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) addi r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 14: bf cr7*4+3,15f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) lbz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) stb r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 15: addi r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) b exit_vmx_ops /* tail call optimise */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) .Lvmx_unaligned_copy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) /* Get the destination 16B aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) neg r6,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) mtocrf 0x01,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) clrldi r6,r6,(64-4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) bf cr7*4+3,1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) lbz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) addi r4,r4,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) stb r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) addi r3,r3,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 1: bf cr7*4+2,2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) lhz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) addi r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) sth r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) addi r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 2: bf cr7*4+1,3f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) lwz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) addi r4,r4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) stw r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) addi r3,r3,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 3: bf cr7*4+0,4f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) lwz r0,0(r4) /* Less chance of a reject with word ops */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) lwz r7,4(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) addi r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) stw r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) stw r7,4(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) addi r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 4: sub r5,r5,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) /* Get the desination 128B aligned */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) neg r6,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) srdi r7,r6,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) mtocrf 0x01,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) clrldi r6,r6,(64-7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) li r9,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) li r10,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) li r11,48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) LVS(v16,0,r4) /* Setup permute control vector */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) lvx v0,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) addi r4,r4,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) bf cr7*4+3,5f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) lvx v1,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) VPERM(v8,v0,v1,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) addi r4,r4,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) stvx v8,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) addi r3,r3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) vor v0,v1,v1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 5: bf cr7*4+2,6f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) lvx v1,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) VPERM(v8,v0,v1,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) lvx v0,r4,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) VPERM(v9,v1,v0,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) addi r4,r4,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) stvx v8,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) stvx v9,r3,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) addi r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 6: bf cr7*4+1,7f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) lvx v3,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) VPERM(v8,v0,v3,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) lvx v2,r4,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) VPERM(v9,v3,v2,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) lvx v1,r4,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) VPERM(v10,v2,v1,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) lvx v0,r4,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) VPERM(v11,v1,v0,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) addi r4,r4,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) stvx v8,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) stvx v9,r3,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) stvx v10,r3,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) stvx v11,r3,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) addi r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) 7: sub r5,r5,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) srdi r6,r5,7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) std r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) std r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) std r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) li r12,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) li r14,80
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) li r15,96
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) li r16,112
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) mtctr r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) * Now do cacheline sized loads and stores. By this stage the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) * cacheline stores are also cacheline aligned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) 8:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) lvx v7,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) VPERM(v8,v0,v7,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) lvx v6,r4,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) VPERM(v9,v7,v6,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) lvx v5,r4,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) VPERM(v10,v6,v5,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) lvx v4,r4,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) VPERM(v11,v5,v4,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) lvx v3,r4,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) VPERM(v12,v4,v3,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) lvx v2,r4,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) VPERM(v13,v3,v2,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) lvx v1,r4,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) VPERM(v14,v2,v1,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554) lvx v0,r4,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555) VPERM(v15,v1,v0,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556) addi r4,r4,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557) stvx v8,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558) stvx v9,r3,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559) stvx v10,r3,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560) stvx v11,r3,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561) stvx v12,r3,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562) stvx v13,r3,r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563) stvx v14,r3,r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564) stvx v15,r3,r16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565) addi r3,r3,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566) bdnz 8b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568) ld r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569) ld r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570) ld r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) /* Up to 127B to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) clrldi r5,r5,(64-7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) srdi r6,r5,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) mtocrf 0x01,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) bf cr7*4+1,9f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) lvx v3,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) VPERM(v8,v0,v3,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) lvx v2,r4,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) VPERM(v9,v3,v2,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) lvx v1,r4,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) VPERM(v10,v2,v1,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) lvx v0,r4,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) VPERM(v11,v1,v0,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) addi r4,r4,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) stvx v8,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) stvx v9,r3,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) stvx v10,r3,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) stvx v11,r3,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) addi r3,r3,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) 9: bf cr7*4+2,10f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) lvx v1,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) VPERM(v8,v0,v1,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) lvx v0,r4,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) VPERM(v9,v1,v0,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) addi r4,r4,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) stvx v8,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 600) stvx v9,r3,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 601) addi r3,r3,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 602)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 603) 10: bf cr7*4+3,11f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 604) lvx v1,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 605) VPERM(v8,v0,v1,v16)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 606) addi r4,r4,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 607) stvx v8,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 608) addi r3,r3,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 609)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 610) /* Up to 15B to go */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 611) 11: clrldi r5,r5,(64-4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 612) addi r4,r4,-16 /* Unwind the +16 load offset */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 613) mtocrf 0x01,r5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 614) bf cr7*4+0,12f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 615) lwz r0,0(r4) /* Less chance of a reject with word ops */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 616) lwz r6,4(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 617) addi r4,r4,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 618) stw r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 619) stw r6,4(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 620) addi r3,r3,8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 621)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 622) 12: bf cr7*4+1,13f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 623) lwz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 624) addi r4,r4,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 625) stw r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 626) addi r3,r3,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 627)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 628) 13: bf cr7*4+2,14f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 629) lhz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 630) addi r4,r4,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 631) sth r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 632) addi r3,r3,2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 633)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 634) 14: bf cr7*4+3,15f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 635) lbz r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 636) stb r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 637)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 638) 15: addi r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 639) ld r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 640) b exit_vmx_ops /* tail call optimise */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 641) #endif /* CONFIG_ALTIVEC */