Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3)  *  Copyright (C) 1995  Linus Torvalds
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7) #include <linux/sched.h>		/* test_thread_flag(), ...	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) #include <linux/sched/task_stack.h>	/* task_stack_*(), ...		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/extable.h>		/* search_exception_tables	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/memblock.h>		/* max_low_pfn			*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/kfence.h>		/* kfence_handle_page_fault	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) #include <linux/kprobes.h>		/* NOKPROBE_SYMBOL, ...		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/mmiotrace.h>		/* kmmio_handler, ...		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/perf_event.h>		/* perf_sw_event		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/hugetlb.h>		/* hstate_index_to_shift	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/prefetch.h>		/* prefetchw			*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/context_tracking.h>	/* exception_enter(), ...	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/uaccess.h>		/* faulthandler_disabled()	*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) #include <linux/efi.h>			/* efi_recover_from_page_fault()*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/mm_types.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <asm/cpufeature.h>		/* boot_cpu_has, ...		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <asm/traps.h>			/* dotraplinkage, ...		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) #include <asm/fixmap.h>			/* VSYSCALL_ADDR		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include <asm/vsyscall.h>		/* emulate_vsyscall		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) #include <asm/vm86.h>			/* struct vm86			*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) #include <asm/mmu_context.h>		/* vma_pkey()			*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) #include <asm/efi.h>			/* efi_recover_from_page_fault()*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) #include <asm/desc.h>			/* store_idt(), ...		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) #include <asm/cpu_entry_area.h>		/* exception stack		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) #include <asm/pgtable_areas.h>		/* VMALLOC_START, ...		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) #include <asm/kvm_para.h>		/* kvm_handle_async_pf		*/
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) #define CREATE_TRACE_POINTS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) #include <asm/trace/exceptions.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  * Returns 0 if mmiotrace is disabled, or if the fault is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40)  * handled by mmiotrace:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) static nokprobe_inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) kmmio_fault(struct pt_regs *regs, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) 	if (unlikely(is_kmmio_active()))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) 		if (kmmio_handler(regs, addr) == 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) 			return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52)  * Prefetch quirks:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54)  * 32-bit mode:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56)  *   Sometimes AMD Athlon/Opteron CPUs report invalid exceptions on prefetch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57)  *   Check that here and ignore it.  This is AMD erratum #91.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59)  * 64-bit mode:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61)  *   Sometimes the CPU reports invalid exceptions on prefetch.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62)  *   Check that here and ignore it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64)  * Opcode checker based on code by Richard Brunner.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) check_prefetch_opcode(struct pt_regs *regs, unsigned char *instr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) 		      unsigned char opcode, int *prefetch)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) 	unsigned char instr_hi = opcode & 0xf0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) 	unsigned char instr_lo = opcode & 0x0f;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 	switch (instr_hi) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 	case 0x20:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 	case 0x30:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 		 * Values 0x26,0x2E,0x36,0x3E are valid x86 prefixes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 		 * In X86_64 long mode, the CPU will signal invalid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 		 * opcode if some of these prefixes are present so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 		 * X86_64 will never get here anyway
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 		return ((instr_lo & 7) == 0x6);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	case 0x40:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 		 * In 64-bit mode 0x40..0x4F are valid REX prefixes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 		return (!user_mode(regs) || user_64bit_mode(regs));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 	case 0x60:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 		/* 0x64 thru 0x67 are valid prefixes in all modes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 		return (instr_lo & 0xC) == 0x4;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	case 0xF0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) 		/* 0xF0, 0xF2, 0xF3 are valid prefixes in all modes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 		return !instr_lo || (instr_lo>>1) == 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) 	case 0x00:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 		/* Prefetch instruction is 0x0F0D or 0x0F18 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) 		if (get_kernel_nofault(opcode, instr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101) 		*prefetch = (instr_lo == 0xF) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102) 			(opcode == 0x0D || opcode == 0x18);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109) static int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110) is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) 	unsigned char *max_instr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) 	unsigned char *instr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 	int prefetch = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 	 * If it was a exec (instruction fetch) fault on NX page, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 	 * do not ignore the fault:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 	if (error_code & X86_PF_INSTR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 	instr = (void *)convert_ip_to_linear(current, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	max_instr = instr + 15;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	 * This code has historically always bailed out if IP points to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	 * not-present page (e.g. due to a race).  No one has ever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	 * complained about this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	pagefault_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 	while (instr < max_instr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 		unsigned char opcode;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 		if (user_mode(regs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 			if (get_user(opcode, instr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 			if (get_kernel_nofault(opcode, instr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 		instr++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 		if (!check_prefetch_opcode(regs, instr, opcode, &prefetch))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 	pagefault_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	return prefetch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) DEFINE_SPINLOCK(pgd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) LIST_HEAD(pgd_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) #ifdef CONFIG_X86_32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	unsigned index = pgd_index(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 	pgd_t *pgd_k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	p4d_t *p4d, *p4d_k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	pud_t *pud, *pud_k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	pmd_t *pmd, *pmd_k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 	pgd += index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 	pgd_k = init_mm.pgd + index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	if (!pgd_present(*pgd_k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 	 * set_pgd(pgd, *pgd_k); here would be useless on PAE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	 * and redundant with the set_pmd() on non-PAE. As would
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	 * set_p4d/set_pud.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	p4d = p4d_offset(pgd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	p4d_k = p4d_offset(pgd_k, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 	if (!p4d_present(*p4d_k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 	pud = pud_offset(p4d, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 	pud_k = pud_offset(p4d_k, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	if (!pud_present(*pud_k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	pmd = pmd_offset(pud, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) 	pmd_k = pmd_offset(pud_k, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) 	if (pmd_present(*pmd) != pmd_present(*pmd_k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) 		set_pmd(pmd, *pmd_k);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	if (!pmd_present(*pmd_k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 		BUG_ON(pmd_pfn(*pmd) != pmd_pfn(*pmd_k));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 	return pmd_k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202)  *   Handle a fault on the vmalloc or module mapping area
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204)  *   This is needed because there is a race condition between the time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205)  *   when the vmalloc mapping code updates the PMD to the point in time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206)  *   where it synchronizes this update with the other page-tables in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207)  *   system.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209)  *   In this race window another thread/CPU can map an area on the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210)  *   PMD, finds it already present and does not synchronize it with the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211)  *   rest of the system yet. As a result v[mz]alloc might return areas
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212)  *   which are not mapped in every page-table in the system, causing an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213)  *   unhandled page-fault when they are accessed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) static noinline int vmalloc_fault(unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	unsigned long pgd_paddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	pmd_t *pmd_k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	pte_t *pte_k;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	/* Make sure we are in vmalloc area: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 	if (!(address >= VMALLOC_START && address < VMALLOC_END))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 	 * Synchronize this task's top level page-table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 	 * with the 'reference' page table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 	 * Do _not_ use "current" here. We might be inside
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	 * an interrupt in the middle of a task switch..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	pgd_paddr = read_cr3_pa();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 	pmd_k = vmalloc_sync_one(__va(pgd_paddr), address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 	if (!pmd_k)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	if (pmd_large(*pmd_k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	pte_k = pte_offset_kernel(pmd_k, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) 	if (!pte_present(*pte_k))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 		return -1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) NOKPROBE_SYMBOL(vmalloc_fault);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) void arch_sync_kernel_mappings(unsigned long start, unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 	for (addr = start & PMD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 	     addr >= TASK_SIZE_MAX && addr < VMALLOC_END;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 	     addr += PMD_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 		spin_lock(&pgd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 		list_for_each_entry(page, &pgd_list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 			spinlock_t *pgt_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 			/* the pgt_lock only for Xen */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 			pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 			spin_lock(pgt_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) 			vmalloc_sync_one(page_address(page), addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 			spin_unlock(pgt_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) 		spin_unlock(&pgd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273)  * Did it hit the DOS screen memory VA from vm86 mode?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) check_v8086_mode(struct pt_regs *regs, unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 		 struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) #ifdef CONFIG_VM86
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) 	unsigned long bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) 	if (!v8086_mode(regs) || !tsk->thread.vm86)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	bit = (address - 0xA0000) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) 	if (bit < 32)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 		tsk->thread.vm86->screen_bitmap |= 1 << bit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) static bool low_pfn(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 	return pfn < max_low_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) static void dump_pagetable(unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) 	pgd_t *base = __va(read_cr3_pa());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	pgd_t *pgd = &base[pgd_index(address)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 	pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) #ifdef CONFIG_X86_PAE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	pr_info("*pdpt = %016Lx ", pgd_val(*pgd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	if (!low_pfn(pgd_val(*pgd) >> PAGE_SHIFT) || !pgd_present(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) #define pr_pde pr_cont
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) #define pr_pde pr_info
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 	p4d = p4d_offset(pgd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	pud = pud_offset(p4d, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 	pmd = pmd_offset(pud, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 	pr_pde("*pde = %0*Lx ", sizeof(*pmd) * 2, (u64)pmd_val(*pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) #undef pr_pde
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 	 * We must not directly access the pte in the highpte
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 	 * case if the page table is located in highmem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 	 * And let's rather not kmap-atomic the pte, just in case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 	 * it's allocated already:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	if (!low_pfn(pmd_pfn(*pmd)) || !pmd_present(*pmd) || pmd_large(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 	pte = pte_offset_kernel(pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 	pr_cont("*pte = %0*Lx ", sizeof(*pte) * 2, (u64)pte_val(*pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 	pr_cont("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) #else /* CONFIG_X86_64: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) #ifdef CONFIG_CPU_SUP_AMD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) static const char errata93_warning[] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) KERN_ERR 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) "******* Your BIOS seems to not contain a fix for K8 errata #93\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) "******* Working around it, but it may cause SEGVs or burn power.\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) "******* Please consider a BIOS update.\n"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) "******* Disabling USB legacy in the BIOS may also help.\n";
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346)  * No vm86 mode in 64-bit mode:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) check_v8086_mode(struct pt_regs *regs, unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 		 struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) static int bad_address(void *p)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	unsigned long dummy;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	return get_kernel_nofault(dummy, (unsigned long *)p);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) static void dump_pagetable(unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	pgd_t *base = __va(read_cr3_pa());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	pgd_t *pgd = base + pgd_index(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 	pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 	if (bad_address(pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) 	pr_info("PGD %lx ", pgd_val(*pgd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 	if (!pgd_present(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	p4d = p4d_offset(pgd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	if (bad_address(p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 	pr_cont("P4D %lx ", p4d_val(*p4d));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	if (!p4d_present(*p4d) || p4d_large(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 	pud = pud_offset(p4d, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 	if (bad_address(pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 	pr_cont("PUD %lx ", pud_val(*pud));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	if (!pud_present(*pud) || pud_large(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	pmd = pmd_offset(pud, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	if (bad_address(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	pr_cont("PMD %lx ", pmd_val(*pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	if (!pmd_present(*pmd) || pmd_large(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	pte = pte_offset_kernel(pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 	if (bad_address(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 		goto bad;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 	pr_cont("PTE %lx", pte_val(*pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 	pr_cont("\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 	return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) bad:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	pr_info("BAD\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) #endif /* CONFIG_X86_64 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417)  * Workaround for K8 erratum #93 & buggy BIOS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419)  * BIOS SMM functions are required to use a specific workaround
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420)  * to avoid corruption of the 64bit RIP register on C stepping K8.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422)  * A lot of BIOS that didn't get tested properly miss this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424)  * The OS sees this as a page fault with the upper 32bits of RIP cleared.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425)  * Try to work around it here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427)  * Note we only handle faults in kernel here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428)  * Does nothing on 32-bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) static int is_errata93(struct pt_regs *regs, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) #if defined(CONFIG_X86_64) && defined(CONFIG_CPU_SUP_AMD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 	    || boot_cpu_data.x86 != 0xf)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 	if (address != regs->ip)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	if ((address >> 32) != 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	address |= 0xffffffffUL << 32;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	if ((address >= (u64)_stext && address <= (u64)_etext) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	    (address >= MODULES_VADDR && address <= MODULES_END)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 		printk_once(errata93_warning);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 		regs->ip = address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455)  * Work around K8 erratum #100 K8 in compat mode occasionally jumps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456)  * to illegal addresses >4GB.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458)  * We catch this in the page fault handler because these addresses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459)  * are not reachable. Just detect this case and return.  Any code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460)  * segment in LDT is compatibility mode.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) static int is_errata100(struct pt_regs *regs, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	if ((regs->cs == __USER32_CS || (regs->cs & (1<<2))) && (address >> 32))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) /* Pentium F0 0F C7 C8 bug workaround: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) static int is_f00f_bug(struct pt_regs *regs, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) #ifdef CONFIG_X86_F00F_BUG
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 	if (boot_cpu_has_bug(X86_BUG_F00F) && idt_is_f00f_address(address)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 		handle_invalid_op(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) static void show_ldttss(const struct desc_ptr *gdt, const char *name, u16 index)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 	u32 offset = (index >> 3) * sizeof(struct desc_struct);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 	struct ldttss_desc desc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	if (index == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 		pr_alert("%s: NULL\n", name);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	if (offset + sizeof(struct ldttss_desc) >= gdt->size) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 		pr_alert("%s: 0x%hx -- out of bounds\n", name, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 	if (copy_from_kernel_nofault(&desc, (void *)(gdt->address + offset),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 			      sizeof(struct ldttss_desc))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 		pr_alert("%s: 0x%hx -- GDT entry is not readable\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 			 name, index);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 	addr = desc.base0 | (desc.base1 << 16) | ((unsigned long)desc.base2 << 24);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	addr |= ((u64)desc.base3 << 32);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	pr_alert("%s: 0x%hx -- base=0x%lx limit=0x%x\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 		 name, index, addr, (desc.limit0 | (desc.limit1 << 16)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) show_fault_oops(struct pt_regs *regs, unsigned long error_code, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 	if (!oops_may_print())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	if (error_code & X86_PF_INSTR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 		unsigned int level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 		pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 		pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 		pgd = __va(read_cr3_pa());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 		pgd += pgd_index(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 		pte = lookup_address_in_pgd(pgd, address, &level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 		if (pte && pte_present(*pte) && !pte_exec(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 			pr_crit("kernel tried to execute NX-protected page - exploit attempt? (uid: %d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 				from_kuid(&init_user_ns, current_uid()));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 		if (pte && pte_present(*pte) && pte_exec(*pte) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 				(pgd_flags(*pgd) & _PAGE_USER) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 				(__read_cr4() & X86_CR4_SMEP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 			pr_crit("unable to execute userspace code (SMEP?) (uid: %d)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 				from_kuid(&init_user_ns, current_uid()));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	if (address < PAGE_SIZE && !user_mode(regs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 		pr_alert("BUG: kernel NULL pointer dereference, address: %px\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 			(void *)address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 		pr_alert("BUG: unable to handle page fault for address: %px\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 			(void *)address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 	pr_alert("#PF: %s %s in %s mode\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 		 (error_code & X86_PF_USER)  ? "user" : "supervisor",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 		 (error_code & X86_PF_INSTR) ? "instruction fetch" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 		 (error_code & X86_PF_WRITE) ? "write access" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 					       "read access",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 			     user_mode(regs) ? "user" : "kernel");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 	pr_alert("#PF: error_code(0x%04lx) - %s\n", error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 		 !(error_code & X86_PF_PROT) ? "not-present page" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 		 (error_code & X86_PF_RSVD)  ? "reserved bit violation" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 		 (error_code & X86_PF_PK)    ? "protection keys violation" :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 					       "permissions violation");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 	if (!(error_code & X86_PF_USER) && user_mode(regs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 		struct desc_ptr idt, gdt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 		u16 ldtr, tr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 		 * This can happen for quite a few reasons.  The more obvious
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 		 * ones are faults accessing the GDT, or LDT.  Perhaps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 		 * surprisingly, if the CPU tries to deliver a benign or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 		 * contributory exception from user code and gets a page fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) 		 * during delivery, the page fault can be delivered as though
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 		 * it originated directly from user code.  This could happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 		 * due to wrong permissions on the IDT, GDT, LDT, TSS, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 		 * kernel or IST stack.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 		store_idt(&idt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 		/* Usable even on Xen PV -- it's just slow. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 		native_store_gdt(&gdt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 		pr_alert("IDT: 0x%lx (limit=0x%hx) GDT: 0x%lx (limit=0x%hx)\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 			 idt.address, idt.size, gdt.address, gdt.size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 		store_ldt(ldtr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 		show_ldttss(&gdt, "LDTR", ldtr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 		store_tr(tr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 		show_ldttss(&gdt, "TR", tr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	dump_pagetable(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) static noinline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) pgtable_bad(struct pt_regs *regs, unsigned long error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	    unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 	struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 	int sig;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	flags = oops_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 	tsk = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 	sig = SIGKILL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 	printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 	       tsk->comm, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 	dump_pagetable(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	if (__die("Bad pagetable", regs, error_code))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 		sig = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	oops_end(flags, regs, sig);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) static void set_signal_archinfo(unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 				unsigned long error_code)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	struct task_struct *tsk = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 	 * To avoid leaking information about the kernel page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 	 * table layout, pretend that user-mode accesses to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 	 * kernel addresses are always protection faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	 * NB: This means that failed vsyscalls with vsyscall=none
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	 * will have the PROT bit.  This doesn't leak any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 	 * information and does not appear to cause any problems.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 	if (address >= TASK_SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 		error_code |= X86_PF_PROT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	tsk->thread.trap_nr = X86_TRAP_PF;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	tsk->thread.error_code = error_code | X86_PF_USER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 	tsk->thread.cr2 = address;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) static noinline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) no_context(struct pt_regs *regs, unsigned long error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 	   unsigned long address, int signal, int si_code)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	struct task_struct *tsk = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 	int sig;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	if (user_mode(regs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 		 * This is an implicit supervisor-mode access from user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 		 * mode.  Bypass all the kernel-mode recovery code and just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 		 * OOPS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 		goto oops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 	/* Are we prepared to handle this kernel fault? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	if (fixup_exception(regs, X86_TRAP_PF, error_code, address)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 		 * Any interrupt that takes a fault gets the fixup. This makes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 		 * the below recursive fault logic only apply to a faults from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 		 * task context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 		if (in_interrupt())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 		 * Per the above we're !in_interrupt(), aka. task context.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 		 * In this case we need to make sure we're not recursively
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 		 * faulting through the emulate_vsyscall() logic.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 		if (current->thread.sig_on_uaccess_err && signal) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 			set_signal_archinfo(address, error_code);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 			/* XXX: hwpoison faults will set the wrong code. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 			force_sig_fault(signal, si_code, (void __user *)address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 		 * Barring that, we can do the fixup and be happy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) #ifdef CONFIG_VMAP_STACK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 	 * Stack overflow?  During boot, we can fault near the initial
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 	 * stack in the direct map, but that's not an overflow -- check
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	 * that we're in vmalloc space to avoid this.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	if (is_vmalloc_addr((void *)address) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 	    (((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 	     address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 		unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 		 * We're likely to be running with very little stack space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 		 * left.  It's plausible that we'd hit this condition but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 		 * double-fault even before we get this far, in which case
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 		 * we're fine: the double-fault handler will deal with it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 		 * We don't want to make it all the way into the oops code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 		 * and then double-fault, though, because we're likely to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 		 * break the console driver and lose most of the stack dump.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 		asm volatile ("movq %[stack], %%rsp\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 			      "call handle_stack_overflow\n\t"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 			      "1: jmp 1b"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 			      : ASM_CALL_CONSTRAINT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 			      : "D" ("kernel stack overflow (page fault)"),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 				"S" (regs), "d" (address),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 				[stack] "rm" (stack));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 		unreachable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 	 * 32-bit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 	 *   Valid to do another page fault here, because if this fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	 *   had been triggered by is_prefetch fixup_exception would have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 	 *   handled it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 	 * 64-bit:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 	 *   Hall of shame of CPU/BIOS bugs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 	if (is_prefetch(regs, error_code, address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 	if (is_errata93(regs, address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 	 * Buggy firmware could access regions which might page fault, try to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	 * recover from such faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	if (IS_ENABLED(CONFIG_EFI))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 		efi_recover_from_page_fault(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	/* Only not-present faults should be handled by KFENCE. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 	if (!(error_code & X86_PF_PROT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	    kfence_handle_page_fault(address, error_code & X86_PF_WRITE, regs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) oops:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 	 * Oops. The kernel tried to access some bad page. We'll have to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 	 * terminate things with extreme prejudice:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 	flags = oops_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 	show_fault_oops(regs, error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 	if (task_stack_end_corrupted(tsk))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	sig = SIGKILL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) 	if (__die("Oops", regs, error_code))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 		sig = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757) 	/* Executive summary in case the body of the oops scrolled away */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758) 	printk(KERN_DEFAULT "CR2: %016lx\n", address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760) 	oops_end(flags, regs, sig);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764)  * Print out info about fatal segfaults, if the show_unhandled_signals
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765)  * sysctl is set:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) static inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) show_signal_msg(struct pt_regs *regs, unsigned long error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 		unsigned long address, struct task_struct *tsk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 	const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 	if (!unhandled_signal(tsk, SIGSEGV))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 	if (!printk_ratelimit())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 		loglvl, tsk->comm, task_pid_nr(tsk), address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 		(void *)regs->ip, (void *)regs->sp, error_code);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	print_vma_addr(KERN_CONT " in ", regs->ip);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	printk(KERN_CONT "\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 	show_opcodes(regs, loglvl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791)  * The (legacy) vsyscall page is the long page in the kernel portion
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792)  * of the address space that has user-accessible permissions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) static bool is_vsyscall_vaddr(unsigned long vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 	return unlikely((vaddr & PAGE_MASK) == VSYSCALL_ADDR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 		       unsigned long address, u32 pkey, int si_code)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	struct task_struct *tsk = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 	/* User mode accesses just cause a SIGSEGV */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 	if (user_mode(regs) && (error_code & X86_PF_USER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 		 * It's possible to have interrupts off here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 		local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 		 * Valid to do another page fault here because this one came
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 		 * from user space:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 		if (is_prefetch(regs, error_code, address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 		if (is_errata100(regs, address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 		 * To avoid leaking information about the kernel page table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 		 * layout, pretend that user-mode accesses to kernel addresses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 		 * are always protection faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 		if (address >= TASK_SIZE_MAX)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 			error_code |= X86_PF_PROT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 		if (likely(show_unhandled_signals))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 			show_signal_msg(regs, error_code, address, tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 		set_signal_archinfo(address, error_code);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 		if (si_code == SEGV_PKUERR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 			force_sig_pkuerr((void __user *)address, pkey);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 		force_sig_fault(SIGSEGV, si_code, (void __user *)address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 		local_irq_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 	if (is_f00f_bug(regs, address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	no_context(regs, error_code, address, SIGSEGV, si_code);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) static noinline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 		     unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 	__bad_area_nosemaphore(regs, error_code, address, 0, SEGV_MAPERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) __bad_area(struct pt_regs *regs, unsigned long error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	   unsigned long address, u32 pkey, int si_code)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 	struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	 * Something tried to access memory that isn't in our memory map..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 	 * Fix it, but check if it's kernel or user first..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	__bad_area_nosemaphore(regs, error_code, address, pkey, si_code);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) static noinline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 	__bad_area(regs, error_code, address, 0, SEGV_MAPERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) static inline bool bad_area_access_from_pkeys(unsigned long error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 		struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 	/* This code is always called on the current mm */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) 	bool foreign = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	if (!boot_cpu_has(X86_FEATURE_OSPKE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 	if (error_code & X86_PF_PK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	/* this checks permission keys on the VMA: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 				       (error_code & X86_PF_INSTR), foreign))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 		return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) static noinline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		      unsigned long address, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	 * This OSPKE check is not strictly necessary at runtime.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 	 * But, doing it this way allows compiler optimizations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	 * if pkeys are compiled out.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	if (bad_area_access_from_pkeys(error_code, vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 		 * A protection key fault means that the PKRU value did not allow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 		 * access to some PTE.  Userspace can figure out what PKRU was
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 		 * from the XSAVE state.  This function captures the pkey from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 		 * the vma and passes it to userspace so userspace can discover
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 		 * which protection key was set on the PTE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 		 * If we get here, we know that the hardware signaled a X86_PF_PK
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 		 * fault and that there was a VMA once we got in the fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 		 * handler.  It does *not* guarantee that the VMA we find here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 		 * was the one that we faulted on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 		 * 1. T1   : mprotect_key(foo, PAGE_SIZE, pkey=4);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 		 * 2. T1   : set PKRU to deny access to pkey=4, touches page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 		 * 3. T1   : faults...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 		 * 4.    T2: mprotect_key(foo, PAGE_SIZE, pkey=5);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 		 * 5. T1   : enters fault handler, takes mmap_lock, etc...
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 		 * 6. T1   : reaches here, sees vma_pkey(vma)=5, when we really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 		 *	     faulted on a pte with its pkey=4.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 		u32 pkey = vma_pkey(vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 		__bad_area(regs, error_code, address, pkey, SEGV_PKUERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 		__bad_area(regs, error_code, address, 0, SEGV_ACCERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 	  vm_fault_t fault)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 	/* Kernel mode? Handle exceptions or die: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	if (!(error_code & X86_PF_USER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 		no_context(regs, error_code, address, SIGBUS, BUS_ADRERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	/* User-space => ok to do another page fault: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	if (is_prefetch(regs, error_code, address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	set_signal_archinfo(address, error_code);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) #ifdef CONFIG_MEMORY_FAILURE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 	if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 		struct task_struct *tsk = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 		unsigned lsb = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 		pr_err(
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 	"MCE: Killing %s:%d due to hardware memory corruption fault at %lx\n",
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 			tsk->comm, tsk->pid, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 		if (fault & VM_FAULT_HWPOISON_LARGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 			lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 		if (fault & VM_FAULT_HWPOISON)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 			lsb = PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 		force_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 	force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) static noinline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) mm_fault_error(struct pt_regs *regs, unsigned long error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 	       unsigned long address, vm_fault_t fault)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	if (fatal_signal_pending(current) && !(error_code & X86_PF_USER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 		no_context(regs, error_code, address, 0, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 	if (fault & VM_FAULT_OOM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 		/* Kernel mode? Handle exceptions or die: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 		if (!(error_code & X86_PF_USER)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 			no_context(regs, error_code, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 				   SIGSEGV, SEGV_MAPERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 		 * We ran out of memory, call the OOM killer, and return the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) 		 * userspace (which will retry the fault, or kill us if we got
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 		 * oom-killed):
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 		pagefault_out_of_memory();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 		if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 			     VM_FAULT_HWPOISON_LARGE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 			do_sigbus(regs, error_code, address, fault);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) 		else if (fault & VM_FAULT_SIGSEGV)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 			bad_area_nosemaphore(regs, error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 			BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) static int spurious_kernel_fault_check(unsigned long error_code, pte_t *pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	if ((error_code & X86_PF_WRITE) && !pte_write(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 	if ((error_code & X86_PF_INSTR) && !pte_exec(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014)  * Handle a spurious fault caused by a stale TLB entry.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)  * This allows us to lazily refresh the TLB when increasing the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)  * permissions of a kernel page (RO -> RW or NX -> X).  Doing it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018)  * eagerly is very expensive since that implies doing a full
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)  * cross-processor TLB flush, even if no stale TLB entries exist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)  * on other processors.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)  * Spurious faults may only occur if the TLB contains an entry with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)  * fewer permission than the page table entry.  Non-present (P = 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024)  * and reserved bit (R = 1) faults are never spurious.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026)  * There are no security implications to leaving a stale TLB when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)  * increasing the permissions on a page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)  * Returns non-zero if a spurious fault was handled, zero otherwise.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)  * See Intel Developer's Manual Vol 3 Section 4.10.4.3, bullet 3
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)  * (Optional Invalidation).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) static noinline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) spurious_kernel_fault(unsigned long error_code, unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) 	pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	 * Only writes to RO or instruction fetches from NX may cause
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 	 * spurious faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 	 * These could be from user or supervisor accesses but the TLB
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 	 * is only lazily flushed after a kernel mapping protection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 	 * change, so user accesses are not expected to cause spurious
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	 * faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	if (error_code != (X86_PF_WRITE | X86_PF_PROT) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	    error_code != (X86_PF_INSTR | X86_PF_PROT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 	pgd = init_mm.pgd + pgd_index(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	if (!pgd_present(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	p4d = p4d_offset(pgd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 	if (!p4d_present(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	if (p4d_large(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 		return spurious_kernel_fault_check(error_code, (pte_t *) p4d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	pud = pud_offset(p4d, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	if (!pud_present(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 	if (pud_large(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 		return spurious_kernel_fault_check(error_code, (pte_t *) pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 	pmd = pmd_offset(pud, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	if (!pmd_present(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 	if (pmd_large(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 		return spurious_kernel_fault_check(error_code, (pte_t *) pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 	pte = pte_offset_kernel(pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	if (!pte_present(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 	ret = spurious_kernel_fault_check(error_code, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 	if (!ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 	 * Make sure we have permissions in PMD.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	 * If not, then there's a bug in the page tables:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 	ret = spurious_kernel_fault_check(error_code, (pte_t *) pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 	WARN_ONCE(!ret, "PMD has incorrect permission bits\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) NOKPROBE_SYMBOL(spurious_kernel_fault);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) int show_unhandled_signals = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) static inline int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) access_error(unsigned long error_code, struct vm_area_struct *vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 	/* This is only called for the current mm, so: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 	bool foreign = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 	 * Read or write was blocked by protection keys.  This is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 	 * always an unconditional error and can never result in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 	 * a follow-up action to resolve the fault, like a COW.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	if (error_code & X86_PF_PK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	 * Make sure to check the VMA so that we do not perform
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	 * faults just to hit a X86_PF_PK as soon as we fill in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	 * page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	if (!arch_vma_access_permitted(vma, (error_code & X86_PF_WRITE),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 				       (error_code & X86_PF_INSTR), foreign))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 	if (error_code & X86_PF_WRITE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 		/* write, present and write, not present: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 		if (unlikely(!(vma->vm_flags & VM_WRITE)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 			return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	/* read, present: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	if (unlikely(error_code & X86_PF_PROT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	/* read, not present: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	if (unlikely(!vma_is_accessible(vma)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 		return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) bool fault_in_kernel_space(unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	 * On 64-bit systems, the vsyscall page is at an address above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	 * TASK_SIZE_MAX, but is not considered part of the kernel
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 	 * address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 	if (IS_ENABLED(CONFIG_X86_64) && is_vsyscall_vaddr(address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 	return address >= TASK_SIZE_MAX;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158)  * Called for all faults where 'address' is part of the kernel address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159)  * space.  Might get called for faults that originate from *code* that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160)  * ran in userspace or the kernel.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) static void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 		   unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	 * Protection keys exceptions only happen on user pages.  We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	 * have no user pages in the kernel portion of the address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	 * space, so do not expect them here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	WARN_ON_ONCE(hw_error_code & X86_PF_PK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) #ifdef CONFIG_X86_32
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	 * We can fault-in kernel-space virtual memory on-demand. The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 	 * 'reference' page table is init_mm.pgd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	 * NOTE! We MUST NOT take any locks for this case. We may
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 	 * be in an interrupt or a critical region, and should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 	 * only copy the information from the master page table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	 * nothing more.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	 * Before doing this on-demand faulting, ensure that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 	 * fault is not any of the following:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	 * 1. A fault on a PTE with a reserved bit set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 	 * 2. A fault caused by a user-mode access.  (Do not demand-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	 *    fault kernel memory due to user-mode accesses).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 	 * 3. A fault caused by a page-level protection violation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	 *    (A demand fault would be on a non-present page which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	 *     would have X86_PF_PROT==0).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	 * This is only needed to close a race condition on x86-32 in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	 * the vmalloc mapping/unmapping code. See the comment above
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	 * vmalloc_fault() for details. On x86-64 the race does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	 * exist as the vmalloc mappings don't need to be synchronized
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 	 * there.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 		if (vmalloc_fault(address) >= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	/* Was the fault spurious, caused by lazy TLB invalidation? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	if (spurious_kernel_fault(hw_error_code, address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) 	/* kprobes don't want to hook the spurious faults: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	if (kprobe_page_fault(regs, X86_TRAP_PF))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) 	 * Note, despite being a "bad area", there are quite a few
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	 * acceptable reasons to get here, such as erratum fixups
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	 * and handling kernel code that can fault, like get_user().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	 * Don't take the mm semaphore here. If we fixup a prefetch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 	 * fault we could otherwise deadlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 	bad_area_nosemaphore(regs, hw_error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) NOKPROBE_SYMBOL(do_kern_addr_fault);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) /* Handle faults in the user portion of the address space */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) static inline
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) void do_user_addr_fault(struct pt_regs *regs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 			unsigned long hw_error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) 			unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) 	struct vm_area_struct *vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 	struct task_struct *tsk;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	struct mm_struct *mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	vm_fault_t fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	unsigned int flags = FAULT_FLAG_DEFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 	tsk = current;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	mm = tsk->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	/* kprobes don't want to hook the spurious faults: */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	if (unlikely(kprobe_page_fault(regs, X86_TRAP_PF)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	 * Reserved bits are never expected to be set on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	 * entries in the user portion of the page tables.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	if (unlikely(hw_error_code & X86_PF_RSVD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 		pgtable_bad(regs, hw_error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	 * If SMAP is on, check for invalid kernel (supervisor) access to user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 	 * pages in the user address space.  The odd case here is WRUSS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 	 * which, according to the preliminary documentation, does not respect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	 * SMAP and will have the USER bit set so, in all cases, SMAP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 	 * enforcement appears to be consistent with the USER bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	if (unlikely(cpu_feature_enabled(X86_FEATURE_SMAP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 		     !(hw_error_code & X86_PF_USER) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 		     !(regs->flags & X86_EFLAGS_AC)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 	{
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 		bad_area_nosemaphore(regs, hw_error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	 * If we're in an interrupt, have no user context or are running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 	 * in a region with pagefaults disabled then we must not take the fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 	if (unlikely(faulthandler_disabled() || !mm)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 		bad_area_nosemaphore(regs, hw_error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	 * It's safe to allow irq's after cr2 has been saved and the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	 * vmalloc fault has been handled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 	 * User-mode registers count as a user access even for any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	 * potential system fault or CPU buglet:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	if (user_mode(regs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 		local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 		flags |= FAULT_FLAG_USER;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 		if (regs->flags & X86_EFLAGS_IF)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) 			local_irq_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 	perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 	if (hw_error_code & X86_PF_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 		flags |= FAULT_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 	if (hw_error_code & X86_PF_INSTR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 		flags |= FAULT_FLAG_INSTRUCTION;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) #ifdef CONFIG_X86_64
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 	 * Faults in the vsyscall page might need emulation.  The
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 	 * vsyscall page is at a high address (>PAGE_OFFSET), but is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 	 * considered to be part of the user address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	 * The vsyscall page does not have a "real" VMA, so do this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	 * emulation before we go searching for VMAs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 	 * PKRU never rejects instruction fetches, so we don't need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	 * to consider the PF_PK bit.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	if (is_vsyscall_vaddr(address)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 		if (emulate_vsyscall(hw_error_code, regs, address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	 * Do not try to do a speculative page fault if the fault was due to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 	 * protection keys since it can't be resolved.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 	if (!(hw_error_code & X86_PF_PK)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 		fault = handle_speculative_fault(mm, address, flags, &vma, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 		if (fault != VM_FAULT_RETRY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 			goto done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 	 * Kernel-mode access to the user address space should only occur
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 	 * on well-defined single instructions listed in the exception
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 	 * tables.  But, an erroneous kernel fault occurring outside one of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 	 * those areas which also holds mmap_lock might deadlock attempting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	 * to validate the fault against the address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 	 * Only do the expensive exception table search when we might be at
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	 * risk of a deadlock.  This happens if we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	 * 1. Failed to acquire mmap_lock, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 	 * 2. The access did not originate in userspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 	if (unlikely(!mmap_read_trylock(mm))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 		if (!user_mode(regs) && !search_exception_tables(regs->ip)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 			 * Fault from code in kernel from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 			 * which we do not expect faults.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 			bad_area_nosemaphore(regs, hw_error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 		mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 		 * The above down_read_trylock() might have succeeded in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 		 * which case we'll have missed the might_sleep() from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) 		 * down_read():
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 		might_sleep();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 	if (!vma || !can_reuse_spf_vma(vma, address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 		vma = find_vma(mm, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 	if (unlikely(!vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 		bad_area(regs, hw_error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 	if (likely(vma->vm_start <= address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 		goto good_area;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 		bad_area(regs, hw_error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 	if (unlikely(expand_stack(vma, address))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 		bad_area(regs, hw_error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 	 * Ok, we have a good vm_area for this memory access, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 	 * we can handle it..
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) good_area:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 	if (unlikely(access_error(hw_error_code, vma))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 		bad_area_access_error(regs, hw_error_code, address, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 	 * If for any reason at all we couldn't handle the fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 	 * make sure we exit gracefully rather than endlessly redo
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 	 * the fault.  Since we never set FAULT_FLAG_RETRY_NOWAIT, if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 	 * we get VM_FAULT_RETRY back, the mmap_lock has been unlocked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 	 * Note that handle_userfault() may also release and reacquire mmap_lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 	 * (and not return with VM_FAULT_RETRY), when returning to userland to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 	 * repeat the page fault later with a VM_FAULT_NOPAGE retval
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 	 * (potentially after handling any pending signal during the return to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 	 * userland). The return to userland is identified whenever
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 	 * FAULT_FLAG_USER|FAULT_FLAG_KILLABLE are both set in flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 	fault = handle_mm_fault(vma, address, flags, regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 	/* Quick path to respond to signals */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 	if (fault_signal_pending(fault, regs)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 		if (!user_mode(regs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 			no_context(regs, hw_error_code, address, SIGBUS,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 				   BUS_ADRERR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) 	 * If we need to retry the mmap_lock has already been released,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 	 * and if there is a fatal signal pending there is no guarantee
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 	 * that we made any progress. Handle this case first.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 	if (unlikely((fault & VM_FAULT_RETRY) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) 		     (flags & FAULT_FLAG_ALLOW_RETRY))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 		flags |= FAULT_FLAG_TRIED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416) 		 * Do not try to reuse this vma and fetch it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417) 		 * again since we will release the mmap_sem.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419) 		vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424) 	mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426) done:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) 	if (unlikely(fault & VM_FAULT_ERROR)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 		mm_fault_error(regs, hw_error_code, address, fault);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 	check_v8086_mode(regs, address, tsk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) NOKPROBE_SYMBOL(do_user_addr_fault);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) static __always_inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) trace_page_fault_entries(struct pt_regs *regs, unsigned long error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 			 unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 	if (!trace_pagefault_enabled())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 	if (user_mode(regs))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 		trace_page_fault_user(address, regs, error_code);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 		trace_page_fault_kernel(address, regs, error_code);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) static __always_inline void
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) handle_page_fault(struct pt_regs *regs, unsigned long error_code,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 			      unsigned long address)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 	trace_page_fault_entries(regs, error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 	if (unlikely(kmmio_fault(regs, address)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 	/* Was the fault on kernel-controlled part of the address space? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 	if (unlikely(fault_in_kernel_space(address))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 		do_kern_addr_fault(regs, error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 		do_user_addr_fault(regs, error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 		 * User address page fault handling might have reenabled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 		 * interrupts. Fixing up all potential exit points of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) 		 * do_user_addr_fault() and its leaf functions is just not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) 		 * doable w/o creating an unholy mess or turning the code
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) 		 * upside down.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) 		local_irq_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 	unsigned long address = read_cr2();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 	irqentry_state_t state;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 	prefetchw(&current->mm->mmap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 	 * KVM uses #PF vector to deliver 'page not present' events to guests
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 	 * (asynchronous page fault mechanism). The event happens when a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 	 * userspace task is trying to access some valid (from guest's point of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 	 * view) memory which is not currently mapped by the host (e.g. the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 	 * memory is swapped out). Note, the corresponding "page ready" event
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 	 * which is injected when the memory becomes available, is delived via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 	 * an interrupt mechanism and not a #PF exception
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 	 * (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 	 * We are relying on the interrupted context being sane (valid RSP,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 	 * relevant locks not held, etc.), which is fine as long as the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 	 * interrupted context had IF=1.  We are also relying on the KVM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 	 * async pf type field and CR2 being read consistently instead of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	 * getting values from real and async page faults mixed up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 	 * Fingers crossed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 	 * The async #PF handling code takes care of idtentry handling
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 	 * itself.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	if (kvm_handle_async_pf(regs, (u32)address))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 	 * Entry handling for valid #PF from kernel mode is slightly
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 	 * different: RCU is already watching and rcu_irq_enter() must not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 	 * be invoked because a kernel fault on a user space address might
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 	 * sleep.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 	 * In case the fault hit a RCU idle region the conditional entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 	 * code reenabled RCU to avoid subsequent wreckage which helps
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 	 * debugability.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	state = irqentry_enter(regs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	instrumentation_begin();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 	handle_page_fault(regs, error_code, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	instrumentation_end();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 	irqentry_exit(regs, state);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) }