^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0-or-later */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) * Copyright (C) IBM Corporation, 2012
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) * Author: Anton Blanchard <anton@au.ibm.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <asm/page.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <asm/ppc_asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) _GLOBAL(copypage_power7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) * We prefetch both the source and destination using enhanced touch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * instructions. We use a stream ID of 0 for the load side and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * 1 for the store side. Since source and destination are page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * aligned we don't need to clear the bottom 7 bits of either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) * address.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) ori r9,r3,1 /* stream=1 => to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) #ifdef CONFIG_PPC_64K_PAGES
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) lis r7,0x0E01 /* depth=7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * units/cachelines=512 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) lis r7,0x0E00 /* depth=7 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) ori r7,r7,0x1000 /* units/cachelines=32 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) ori r10,r7,1 /* stream=1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) lis r8,0x8000 /* GO=1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) clrldi r8,r8,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) /* setup read stream 0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) dcbt 0,r4,0b01000 /* addr from */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) dcbt 0,r7,0b01010 /* length and depth from */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) /* setup write stream 1 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) dcbtst 0,r9,0b01000 /* addr to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) dcbtst 0,r10,0b01010 /* length and depth to */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) eieio
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) dcbt 0,r8,0b01010 /* all streams GO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) #ifdef CONFIG_ALTIVEC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) mflr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) std r3,-STACKFRAMESIZE+STK_REG(R31)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) std r4,-STACKFRAMESIZE+STK_REG(R30)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) std r0,16(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) stdu r1,-STACKFRAMESIZE(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) bl enter_vmx_ops
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) cmpwi r3,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) ld r0,STACKFRAMESIZE+16(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) ld r3,STK_REG(R31)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) ld r4,STK_REG(R30)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) mtlr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) li r0,(PAGE_SIZE/128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) mtctr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) beq .Lnonvmx_copy
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) addi r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) li r6,16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) li r7,32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) li r8,48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) li r9,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) li r10,80
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) li r11,96
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) li r12,112
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) .align 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) 1: lvx v7,0,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) lvx v6,r4,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) lvx v5,r4,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) lvx v4,r4,r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) lvx v3,r4,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) lvx v2,r4,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) lvx v1,r4,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) lvx v0,r4,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) addi r4,r4,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) stvx v7,0,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) stvx v6,r3,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) stvx v5,r3,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) stvx v4,r3,r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) stvx v3,r3,r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) stvx v2,r3,r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) stvx v1,r3,r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) stvx v0,r3,r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) addi r3,r3,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) bdnz 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) b exit_vmx_ops /* tail call optimise */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) li r0,(PAGE_SIZE/128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) mtctr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) stdu r1,-STACKFRAMESIZE(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) .Lnonvmx_copy:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) std r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) std r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) std r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) std r17,STK_REG(R17)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) std r18,STK_REG(R18)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) std r19,STK_REG(R19)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) std r20,STK_REG(R20)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 1: ld r0,0(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) ld r5,8(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) ld r6,16(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) ld r7,24(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) ld r8,32(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) ld r9,40(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) ld r10,48(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) ld r11,56(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) ld r12,64(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) ld r14,72(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) ld r15,80(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) ld r16,88(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) ld r17,96(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) ld r18,104(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) ld r19,112(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) ld r20,120(r4)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) addi r4,r4,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) std r0,0(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) std r5,8(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) std r6,16(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) std r7,24(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) std r8,32(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) std r9,40(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) std r10,48(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) std r11,56(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) std r12,64(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) std r14,72(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) std r15,80(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) std r16,88(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) std r17,96(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) std r18,104(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) std r19,112(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) std r20,120(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) addi r3,r3,128
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) bdnz 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) ld r14,STK_REG(R14)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) ld r15,STK_REG(R15)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) ld r16,STK_REG(R16)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) ld r17,STK_REG(R17)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) ld r18,STK_REG(R18)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) ld r19,STK_REG(R19)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) ld r20,STK_REG(R20)(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) addi r1,r1,STACKFRAMESIZE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) blr