^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) * fpu.c - save/restore of Floating Point Unit Registers on task switch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9) #include <asm/fpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11) #ifdef CONFIG_ISA_ARCOMPACT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) * To save/restore FPU regs, simplest scheme would use LR/SR insns.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) * However since SR serializes the pipeline, an alternate "hack" can be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) * which uses the FPU Exchange insn (DEXCL) to r/w FPU regs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18) * Store to 64bit dpfp1 reg from a pair of core regs:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) * dexcl1 0, r1, r0 ; where r1:r0 is the 64 bit val
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) * Read from dpfp1 into pair of core regs (w/o clobbering dpfp1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22) * mov_s r3, 0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) * daddh11 r1, r3, r3 ; get "hi" into r1 (dpfp1 unchanged)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24) * dexcl1 r0, r1, r3 ; get "low" into r0 (dpfp1 low clobbered)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) * dexcl1 0, r1, r0 ; restore dpfp1 to orig value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) * However we can tweak the read, so that read-out of outgoing task's FPU regs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) * and write of incoming task's regs happen in one shot. So all the work is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) * done before context switch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32) void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) unsigned int *saveto = &prev->thread.fpu.aux_dpfp[0].l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35) unsigned int *readfrom = &next->thread.fpu.aux_dpfp[0].l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) const unsigned int zero = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) __asm__ __volatile__(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) "daddh11 %0, %2, %2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) "dexcl1 %1, %3, %4\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42) : "=&r" (*(saveto + 1)), /* early clobber must here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) "=&r" (*(saveto))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) : "r" (zero), "r" (*(readfrom + 1)), "r" (*(readfrom))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45) );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47) __asm__ __volatile__(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) "daddh22 %0, %2, %2\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) "dexcl2 %1, %3, %4\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) : "=&r"(*(saveto + 3)), /* early clobber must here */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) "=&r"(*(saveto + 2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) : "r" (zero), "r" (*(readfrom + 3)), "r" (*(readfrom + 2))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) );
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) void fpu_init_task(struct pt_regs *regs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) const unsigned int fwe = 0x80000000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) /* default rounding mode */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) write_aux_reg(ARC_REG_FPU_CTRL, 0x100);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) /* Initialize to zero: setting requires FWE be set */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) write_aux_reg(ARC_REG_FPU_STATUS, fwe);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) void fpu_save_restore(struct task_struct *prev, struct task_struct *next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) struct arc_fpu *save = &prev->thread.fpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) struct arc_fpu *restore = &next->thread.fpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) const unsigned int fwe = 0x80000000;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) save->ctrl = read_aux_reg(ARC_REG_FPU_CTRL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) save->status = read_aux_reg(ARC_REG_FPU_STATUS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) write_aux_reg(ARC_REG_FPU_CTRL, restore->ctrl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) write_aux_reg(ARC_REG_FPU_STATUS, (fwe | restore->status));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #endif