^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1) /* SPDX-License-Identifier: GPL-2.0 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2) #include <linux/jump_label.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 3) #include <asm/unwind_hints.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 4) #include <asm/cpufeatures.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 5) #include <asm/page_types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 6) #include <asm/percpu.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 7) #include <asm/asm-offsets.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 8) #include <asm/processor-flags.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 9)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 10) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 11)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 12) x86 function call convention, 64-bit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 13) -------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 14) arguments | callee-saved | extra caller-saved | return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 15) [callee-clobbered] | | [callee-clobbered] |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 16) ---------------------------------------------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 17) rdi rsi rdx rcx r8-9 | rbx rbp [*] r12-15 | r10-11 | rax, rdx [**]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 18)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 19) ( rsp is obviously invariant across normal function calls. (gcc can 'merge'
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 20) functions when it sees tail-call optimization possibilities) rflags is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 21) clobbered. Leftover arguments are passed over the stack frame.)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 22)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 23) [*] In the frame-pointers case rbp is fixed to the stack frame.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 24)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 25) [**] for struct return values wider than 64 bits the return convention is a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 26) bit more complex: up to 128 bits width we return small structures
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 27) straight in rax, rdx. For structures larger than that (3 words or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 28) larger) the caller puts a pointer to an on-stack return struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 29) [allocated in the caller's stack frame] into the first argument - i.e.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 30) into rdi. All other arguments shift up by one in this case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 31) Fortunately this case is rare in the kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 33) For 32-bit we have the following conventions - kernel is built with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 34) -mregparm=3 and -freg-struct-return:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 35)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 36) x86 function calling convention, 32-bit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 37) ----------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 38) arguments | callee-saved | extra caller-saved | return
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 39) [callee-clobbered] | | [callee-clobbered] |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 40) -------------------------------------------------------------------------
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 41) eax edx ecx | ebx edi esi ebp [*] | <none> | eax, edx [**]
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 42)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 43) ( here too esp is obviously invariant across normal function calls. eflags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 44) is clobbered. Leftover arguments are passed over the stack frame. )
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 45)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 46) [*] In the frame-pointers case ebp is fixed to the stack frame.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 47)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 48) [**] We build with -freg-struct-return, which on 32-bit means similar
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 49) semantics as on 64-bit: edx can be used for a second return value
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 50) (i.e. covering integer and structure sizes up to 64 bits) - after that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 51) it gets more complex and more expensive: 3-word or larger struct returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 52) get done in the caller's frame and the pointer to the return struct goes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 53) into regparm0, i.e. eax - the other arguments shift up and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 54) function's register parameters degenerate to regparm=2 in essence.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 55)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 56) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 57)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 58) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 59)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 60) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 61) * 64-bit system call stack frame layout defines and helpers,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 62) * for assembly code:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 63) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 64)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 65) /* The layout forms the "struct pt_regs" on the stack: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 66) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 67) * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 68) * unless syscall needs a complete, fully filled "struct pt_regs".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 69) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 70) #define R15 0*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 71) #define R14 1*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 72) #define R13 2*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 73) #define R12 3*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 74) #define RBP 4*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 75) #define RBX 5*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 76) /* These regs are callee-clobbered. Always saved on kernel entry. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 77) #define R11 6*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 78) #define R10 7*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 79) #define R9 8*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 80) #define R8 9*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 81) #define RAX 10*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 82) #define RCX 11*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 83) #define RDX 12*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 84) #define RSI 13*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 85) #define RDI 14*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 86) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 87) * On syscall entry, this is syscall#. On CPU exception, this is error code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 88) * On hw interrupt, it's IRQ number:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 89) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 90) #define ORIG_RAX 15*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 91) /* Return frame for iretq */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 92) #define RIP 16*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 93) #define CS 17*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 94) #define EFLAGS 18*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 95) #define RSP 19*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 96) #define SS 20*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 97)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 98) #define SIZEOF_PTREGS 21*8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 99)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) .macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) .if \save_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) pushq %rsi /* pt_regs->si */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) movq %rdi, 8(%rsp) /* pt_regs->di (overwriting original return address) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) .else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) pushq %rdi /* pt_regs->di */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) pushq %rsi /* pt_regs->si */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) pushq \rdx /* pt_regs->dx */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) pushq %rcx /* pt_regs->cx */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) pushq \rax /* pt_regs->ax */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) pushq %r8 /* pt_regs->r8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) pushq %r9 /* pt_regs->r9 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) pushq %r10 /* pt_regs->r10 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) pushq %r11 /* pt_regs->r11 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) pushq %rbx /* pt_regs->rbx */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) pushq %rbp /* pt_regs->rbp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) pushq %r12 /* pt_regs->r12 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) pushq %r13 /* pt_regs->r13 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) pushq %r14 /* pt_regs->r14 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) pushq %r15 /* pt_regs->r15 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) UNWIND_HINT_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) .if \save_ret
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) pushq %rsi /* return address on top of stack */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) * Sanitize registers of values that a speculation attack might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) * otherwise want to exploit. The lower registers are likely clobbered
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) * well before they could be put to use in a speculative execution
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) * gadget.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) xorl %edx, %edx /* nospec dx */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) xorl %ecx, %ecx /* nospec cx */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) xorl %r8d, %r8d /* nospec r8 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) xorl %r9d, %r9d /* nospec r9 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) xorl %r10d, %r10d /* nospec r10 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) xorl %r11d, %r11d /* nospec r11 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) xorl %ebx, %ebx /* nospec rbx */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) xorl %ebp, %ebp /* nospec rbp */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) xorl %r12d, %r12d /* nospec r12 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) xorl %r13d, %r13d /* nospec r13 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) xorl %r14d, %r14d /* nospec r14 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) xorl %r15d, %r15d /* nospec r15 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) .macro POP_REGS pop_rdi=1 skip_r11rcx=0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) popq %r15
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) popq %r14
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) popq %r13
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) popq %r12
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) popq %rbp
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) popq %rbx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) .if \skip_r11rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) popq %rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) .else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) popq %r11
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) popq %r10
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) popq %r9
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) popq %r8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) popq %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) .if \skip_r11rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) popq %rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) .else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) popq %rcx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) popq %rdx
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) popq %rsi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) .if \pop_rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) popq %rdi
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) .endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) #ifdef CONFIG_PAGE_TABLE_ISOLATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) * PAGE_TABLE_ISOLATION PGDs are 8k. Flip bit 12 to switch between the two
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) * halves:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) #define PTI_USER_PGTABLE_BIT PAGE_SHIFT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) #define PTI_USER_PGTABLE_MASK (1 << PTI_USER_PGTABLE_BIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) #define PTI_USER_PCID_BIT X86_CR3_PTI_PCID_USER_BIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) #define PTI_USER_PCID_MASK (1 << PTI_USER_PCID_BIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) #define PTI_USER_PGTABLE_AND_PCID_MASK (PTI_USER_PCID_MASK | PTI_USER_PGTABLE_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) .macro SET_NOFLUSH_BIT reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) bts $X86_CR3_PCID_NOFLUSH_BIT, \reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) .macro ADJUST_KERNEL_CR3 reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) ALTERNATIVE "", "SET_NOFLUSH_BIT \reg", X86_FEATURE_PCID
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) /* Clear PCID and "PAGE_TABLE_ISOLATION bit", point CR3 at kernel pagetables: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) andq $(~PTI_USER_PGTABLE_AND_PCID_MASK), \reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) mov %cr3, \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) ADJUST_KERNEL_CR3 \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) mov \scratch_reg, %cr3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) .Lend_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) #define THIS_CPU_user_pcid_flush_mask \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) PER_CPU_VAR(cpu_tlbstate) + TLB_STATE_user_pcid_flush_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) mov %cr3, \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) * Test if the ASID needs a flush.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) movq \scratch_reg, \scratch_reg2
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) andq $(0x7FF), \scratch_reg /* mask ASID */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) jnc .Lnoflush_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) /* Flush needed, clear the bit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) movq \scratch_reg2, \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) jmp .Lwrcr3_pcid_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) .Lnoflush_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) movq \scratch_reg2, \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) SET_NOFLUSH_BIT \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) .Lwrcr3_pcid_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) /* Flip the ASID to the user version */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) orq $(PTI_USER_PCID_MASK), \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) .Lwrcr3_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) /* Flip the PGD to the user version */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) orq $(PTI_USER_PGTABLE_MASK), \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) mov \scratch_reg, %cr3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) .Lend_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) pushq %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) SWITCH_TO_USER_CR3_NOSTACK scratch_reg=\scratch_reg scratch_reg2=%rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) popq %rax
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) ALTERNATIVE "jmp .Ldone_\@", "", X86_FEATURE_PTI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) movq %cr3, \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) movq \scratch_reg, \save_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) * Test the user pagetable bit. If set, then the user page tables
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) * are active. If clear CR3 already has the kernel page table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) * active.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) bt $PTI_USER_PGTABLE_BIT, \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) jnc .Ldone_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) ADJUST_KERNEL_CR3 \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) movq \scratch_reg, %cr3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) .Ldone_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) .macro RESTORE_CR3 scratch_reg:req save_reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) ALTERNATIVE "jmp .Lwrcr3_\@", "", X86_FEATURE_PCID
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) * KERNEL pages can always resume with NOFLUSH as we do
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) * explicit flushes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) bt $PTI_USER_PGTABLE_BIT, \save_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) jnc .Lnoflush_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) * Check if there's a pending flush for the user ASID we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) * about to set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) movq \save_reg, \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) andq $(0x7FF), \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) bt \scratch_reg, THIS_CPU_user_pcid_flush_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) jnc .Lnoflush_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) btr \scratch_reg, THIS_CPU_user_pcid_flush_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) jmp .Lwrcr3_\@
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) .Lnoflush_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) SET_NOFLUSH_BIT \save_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) .Lwrcr3_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) * The CR3 write could be avoided when not changing its value,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) * but would require a CR3 read *and* a scratch register.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) movq \save_reg, %cr3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) .Lend_\@:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) #else /* CONFIG_PAGE_TABLE_ISOLATION=n: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) .macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) .macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) .macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) .macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) .macro RESTORE_CR3 scratch_reg:req save_reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) * Mitigate Spectre v1 for conditional swapgs code paths.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) * FENCE_SWAPGS_USER_ENTRY is used in the user entry swapgs code path, to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) * prevent a speculative swapgs when coming from kernel space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) * FENCE_SWAPGS_KERNEL_ENTRY is used in the kernel entry non-swapgs code path,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) * to prevent the swapgs from getting speculatively skipped when coming from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) * user space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) .macro FENCE_SWAPGS_USER_ENTRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_USER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) .macro FENCE_SWAPGS_KERNEL_ENTRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) ALTERNATIVE "", "lfence", X86_FEATURE_FENCE_SWAPGS_KERNEL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) .macro STACKLEAK_ERASE_NOCLOBBER
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) PUSH_AND_CLEAR_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) call stackleak_erase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) POP_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) .macro SAVE_AND_SET_GSBASE scratch_reg:req save_reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) rdgsbase \save_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) GET_PERCPU_BASE \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) wrgsbase \scratch_reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) #else /* CONFIG_X86_64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) # undef UNWIND_HINT_IRET_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) # define UNWIND_HINT_IRET_REGS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) #endif /* !CONFIG_X86_64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) .macro STACKLEAK_ERASE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) #ifdef CONFIG_GCC_PLUGIN_STACKLEAK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) call stackleak_erase
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) * CPU/node NR is loaded from the limit (size) field of a special segment
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) * descriptor entry in GDT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) .macro LOAD_CPU_AND_NODE_SEG_LIMIT reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) movq $__CPUNODE_SEG, \reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) #ifdef __clang__
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) .long 0xc0030f48
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) lsl \reg, \reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) * Fetch the per-CPU GSBASE value for this processor and put it in @reg.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) * We normally use %gs for accessing per-CPU data, but we are setting up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) * %gs here and obviously can not use %gs itself to access per-CPU data.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) * Do not use RDPID, because KVM loads guest's TSC_AUX on vm-entry and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) * may not restore the host's value until the CPU returns to userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) * Thus the kernel would consume a guest's TSC_AUX if an NMI arrives
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) * while running KVM's run loop.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) .macro GET_PERCPU_BASE reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) LOAD_CPU_AND_NODE_SEG_LIMIT \reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) andq $VDSO_CPUNODE_MASK, \reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) movq __per_cpu_offset(, \reg, 8), \reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) .macro GET_PERCPU_BASE reg:req
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) movq pcpu_unit_offsets(%rip), \reg
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) .endm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398) #endif /* CONFIG_SMP */