^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <asm/processor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <asm/ppc_asm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <asm/reg.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <asm/asm-offsets.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <asm/cputable.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <asm/thread_info.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <asm/page.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <asm/ptrace.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) #include <asm/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #include <asm/asm-compat.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * Load state from memory into VMX registers including VSCR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * Assumes the caller has enabled VMX in the MSR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) _GLOBAL(load_vr_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) li r4,VRSTATE_VSCR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) lvx v0,r4,r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) mtvscr v0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) REST_32VRS(0,r4,r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) EXPORT_SYMBOL(load_vr_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) _ASM_NOKPROBE_SYMBOL(load_vr_state); /* used by restore_math */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * Store VMX state into memory, including VSCR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * Assumes the caller has enabled VMX in the MSR.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) _GLOBAL(store_vr_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) SAVE_32VRS(0, r4, r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) mfvscr v0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) li r4, VRSTATE_VSCR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) stvx v0, r4, r3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) EXPORT_SYMBOL(store_vr_state)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) * Disable VMX for the task which had it previously,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) * and save its vector registers in its thread_struct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) * Enables the VMX for use in the kernel on return.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) * On SMP we know the VMX is free, since we give it up every
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) * switch (ie, no lazy save of the vector registers).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) * Note that on 32-bit this can only use registers that will be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) * restored by fast_exception_return, i.e. r3 - r6, r10 and r11.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) _GLOBAL(load_up_altivec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) mfmsr r5 /* grab the current MSR */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) oris r5,r5,MSR_VEC@h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) MTMSRD(r5) /* enable use of AltiVec now */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) isync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55) * While userspace in general ignores VRSAVE, glibc uses it as a boolean
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) * to optimise userspace context save/restore. Whenever we take an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57) * altivec unavailable exception we must set VRSAVE to something non
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) * zero. Set it to all 1s. See also the programming note in the ISA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) mfspr r4,SPRN_VRSAVE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) cmpwi 0,r4,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) bne+ 1f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) li r4,-1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64) mtspr SPRN_VRSAVE,r4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) 1:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /* enable use of VMX after return */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) #ifdef CONFIG_PPC32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) mfspr r5,SPRN_SPRG_THREAD /* current task's THREAD (phys) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) oris r9,r9,MSR_VEC@h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #ifdef CONFIG_VMAP_STACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) tovirt(r5, r5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) ld r4,PACACURRENT(r13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) addi r5,r4,THREAD /* Get THREAD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) oris r12,r12,MSR_VEC@h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) std r12,_MSR(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) li r4,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) stb r4,THREAD_LOAD_VEC(r5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) addi r6,r5,THREAD_VRSTATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) li r4,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) li r10,VRSTATE_VSCR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) stw r4,THREAD_USED_VR(r5)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) lvx v0,r10,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) mtvscr v0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) REST_32VRS(0,r4,r6)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) /* restore registers and return */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) _ASM_NOKPROBE_SYMBOL(load_up_altivec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) * save_altivec(tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) * Save the vector registers to its thread_struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) _GLOBAL(save_altivec)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97) addi r3,r3,THREAD /* want THREAD of task */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) PPC_LL r7,THREAD_VRSAVEAREA(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99) PPC_LL r5,PT_REGS(r3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) PPC_LCMPI 0,r7,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) bne 2f
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) addi r7,r3,THREAD_VRSTATE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 2: SAVE_32VRS(0,r4,r7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) mfvscr v0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) li r4,VRSTATE_VSCR
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) stvx v0,r4,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) #ifdef CONFIG_VSX
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) #ifdef CONFIG_PPC32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) #error This asm code isn't ready for 32-bit kernels
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) * load_up_vsx(unused, unused, tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) * Disable VSX for the task which had it previously,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) * and save its vector registers in its thread_struct.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) * Reuse the fp and vsx saves, but first check to see if they have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) * been saved already.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) _GLOBAL(load_up_vsx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) /* Load FP and VSX registers if they haven't been done yet */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) andi. r5,r12,MSR_FP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) beql+ load_up_fpu /* skip if already loaded */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) andis. r5,r12,MSR_VEC@h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) beql+ load_up_altivec /* skip if already loaded */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) ld r4,PACACURRENT(r13)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) addi r4,r4,THREAD /* Get THREAD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) li r6,1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) stw r6,THREAD_USED_VSR(r4) /* ... also set thread used vsr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) /* enable use of VSX after return */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) oris r12,r12,MSR_VSX@h
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) std r12,_MSR(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) b fast_interrupt_return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) #endif /* CONFIG_VSX */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) * The routines below are in assembler so we can closely control the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) * usage of floating-point registers. These routines must be called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) * with preempt disabled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) #ifdef CONFIG_PPC32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) .data
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) fpzero:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) .long 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) fpone:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) .long 0x3f800000 /* 1.0 in single-precision FP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) fphalf:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) .long 0x3f000000 /* 0.5 in single-precision FP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) #define LDCONST(fr, name) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) lis r11,name@ha; \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) lfs fr,name@l(r11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) .section ".toc","aw"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) fpzero:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) .tc FD_0_0[TC],0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) fpone:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) .tc FD_3ff00000_0[TC],0x3ff0000000000000 /* 1.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) fphalf:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) .tc FD_3fe00000_0[TC],0x3fe0000000000000 /* 0.5 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) #define LDCONST(fr, name) \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) lfd fr,name@toc(r2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) .text
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) * Internal routine to enable floating point and set FPSCR to 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) * Don't call it from C; it doesn't use the normal calling convention.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) fpenable:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) #ifdef CONFIG_PPC32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) stwu r1,-64(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) stdu r1,-64(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) mfmsr r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) ori r11,r10,MSR_FP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) mtmsr r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) isync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) stfd fr0,24(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) stfd fr1,16(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) stfd fr31,8(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) LDCONST(fr1, fpzero)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) mffs fr31
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) MTFSF_L(fr1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) fpdisable:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) mtlr r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) MTFSF_L(fr31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) lfd fr31,8(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) lfd fr1,16(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) lfd fr0,24(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) mtmsr r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) isync
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) addi r1,r1,64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) blr
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) * Vector add, floating point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) _GLOBAL(vaddfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) mflr r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) bl fpenable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) li r0,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) mtctr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) li r6,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) 1: lfsx fr0,r4,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) lfsx fr1,r5,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) fadds fr0,fr0,fr1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) stfsx fr0,r3,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) addi r6,r6,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) bdnz 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) b fpdisable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) * Vector subtract, floating point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) _GLOBAL(vsubfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) mflr r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) bl fpenable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) li r0,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) mtctr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) li r6,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) 1: lfsx fr0,r4,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) lfsx fr1,r5,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) fsubs fr0,fr0,fr1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) stfsx fr0,r3,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) addi r6,r6,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) bdnz 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) b fpdisable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) * Vector multiply and add, floating point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) _GLOBAL(vmaddfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) mflr r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) bl fpenable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) stfd fr2,32(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) li r0,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) mtctr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) li r7,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 1: lfsx fr0,r4,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) lfsx fr1,r5,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) lfsx fr2,r6,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) fmadds fr0,fr0,fr2,fr1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) stfsx fr0,r3,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) addi r7,r7,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) bdnz 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) lfd fr2,32(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) b fpdisable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) * Vector negative multiply and subtract, floating point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) _GLOBAL(vnmsubfp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) mflr r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) bl fpenable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) stfd fr2,32(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) li r0,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) mtctr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) li r7,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 1: lfsx fr0,r4,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) lfsx fr1,r5,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) lfsx fr2,r6,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) fnmsubs fr0,fr0,fr2,fr1
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) stfsx fr0,r3,r7
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) addi r7,r7,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) bdnz 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) lfd fr2,32(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) b fpdisable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * Vector reciprocal estimate. We just compute 1.0/x.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * r3 -> destination, r4 -> source.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) _GLOBAL(vrefp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) mflr r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) bl fpenable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) li r0,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) LDCONST(fr1, fpone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) mtctr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) li r6,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 1: lfsx fr0,r4,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) fdivs fr0,fr1,fr0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) stfsx fr0,r3,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) addi r6,r6,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) bdnz 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) b fpdisable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) * Vector reciprocal square-root estimate, floating point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) * We use the frsqrte instruction for the initial estimate followed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) * by 2 iterations of Newton-Raphson to get sufficient accuracy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) * r3 -> destination, r4 -> source.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) _GLOBAL(vrsqrtefp)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) mflr r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) bl fpenable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) stfd fr2,32(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) stfd fr3,40(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) stfd fr4,48(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) stfd fr5,56(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) li r0,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) LDCONST(fr4, fpone)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) LDCONST(fr5, fphalf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) mtctr r0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) li r6,0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 1: lfsx fr0,r4,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) frsqrte fr1,fr0 /* r = frsqrte(s) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) fmuls fr3,fr1,fr0 /* r * s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) fmuls fr2,fr1,fr5 /* r * 0.5 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) fmuls fr3,fr1,fr0 /* r * s */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) fmuls fr2,fr1,fr5 /* r * 0.5 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) fnmsubs fr3,fr1,fr3,fr4 /* 1 - s * r * r */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) fmadds fr1,fr2,fr3,fr1 /* r = r + 0.5 * r * (1 - s * r * r) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) stfsx fr1,r3,r6
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) addi r6,r6,4
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) bdnz 1b
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) lfd fr5,56(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) lfd fr4,48(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) lfd fr3,40(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) lfd fr2,32(r1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) b fpdisable