Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4)  * Xen mmu operations
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6)  * This file contains the various mmu fetch and update operations.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7)  * The most important job they must perform is the mapping between the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8)  * domain's pfn and the overall machine mfns.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10)  * Xen allows guests to directly update the pagetable, in a controlled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11)  * fashion.  In other words, the guest modifies the same pagetable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12)  * that the CPU actually uses, which eliminates the overhead of having
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13)  * a separate shadow pagetable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15)  * In order to allow this, it falls on the guest domain to map its
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16)  * notion of a "physical" pfn - which is just a domain-local linear
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17)  * address - into a real "machine address" which the CPU's MMU can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18)  * use.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20)  * A pgd_t/pmd_t/pte_t will typically contain an mfn, and so can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21)  * inserted directly into the pagetable.  When creating a new
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22)  * pte/pmd/pgd, it converts the passed pfn into an mfn.  Conversely,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23)  * when reading the content back with __(pgd|pmd|pte)_val, it converts
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24)  * the mfn back into a pfn.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26)  * The other constraint is that all pages which make up a pagetable
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27)  * must be mapped read-only in the guest.  This prevents uncontrolled
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28)  * guest updates to the pagetable.  Xen strictly enforces this, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29)  * will disallow any pagetable update which will end up mapping a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30)  * pagetable page RW, and will disallow using any writable page as a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31)  * pagetable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33)  * Naively, when loading %cr3 with the base of a new pagetable, Xen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34)  * would need to validate the whole pagetable before going on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35)  * Naturally, this is quite slow.  The solution is to "pin" a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36)  * pagetable, which enforces all the constraints on the pagetable even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37)  * when it is not actively in use.  This menas that Xen can be assured
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38)  * that it is still valid when you do load it into %cr3, and doesn't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39)  * need to revalidate it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41)  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) #include <linux/highmem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) #include <linux/debugfs.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) #include <linux/bug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) #include <linux/vmalloc.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) #include <linux/export.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) #include <linux/gfp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) #include <linux/memblock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) #include <linux/seq_file.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) #include <linux/crash_dump.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) #include <linux/pgtable.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #ifdef CONFIG_KEXEC_CORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) #include <linux/kexec.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) #include <trace/events/xen.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) #include <asm/fixmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) #include <asm/mmu_context.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) #include <asm/setup.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) #include <asm/paravirt.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) #include <asm/e820/api.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67) #include <asm/linkage.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68) #include <asm/page.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69) #include <asm/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) #include <asm/memtype.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) #include <asm/smp.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) #include <asm/tlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) #include <asm/xen/hypercall.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) #include <asm/xen/hypervisor.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) #include <xen/xen.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) #include <xen/page.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) #include <xen/interface/xen.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) #include <xen/interface/hvm/hvm_op.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) #include <xen/interface/version.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) #include <xen/interface/memory.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) #include <xen/hvc-console.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) #include "multicalls.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) #include "mmu.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) #include "debugfs.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) /* l3 pud for userspace vsyscall mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) static pud_t level3_user_vsyscall[PTRS_PER_PUD] __page_aligned_bss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93)  * Protects atomic reservation decrease/increase against concurrent increases.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94)  * Also protects non-atomic updates of current_pages and balloon lists.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) static DEFINE_SPINLOCK(xen_reservation_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99)  * Note about cr3 (pagetable base) values:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101)  * xen_cr3 contains the current logical cr3 value; it contains the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102)  * last set cr3.  This may not be the current effective cr3, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103)  * its update may be being lazily deferred.  However, a vcpu looking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104)  * at its own cr3 can use this value knowing that it everything will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105)  * be self-consistent.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107)  * xen_current_cr3 contains the actual vcpu cr3; it is set once the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108)  * hypercall to set the vcpu cr3 is complete (so it may be a little
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109)  * out of date, but it will never be set early).  If one vcpu is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110)  * looking at another vcpu's cr3 value, it should use this variable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112) DEFINE_PER_CPU(unsigned long, xen_cr3);	 /* cr3 stored as physaddr */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113) DEFINE_PER_CPU(unsigned long, xen_current_cr3);	 /* actual vcpu cr3 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) static phys_addr_t xen_pt_base, xen_pt_size __initdata;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) static DEFINE_STATIC_KEY_FALSE(xen_struct_pages_ready);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120)  * Just beyond the highest usermode address.  STACK_TOP_MAX has a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121)  * redzone above it, so round it up to a PGD boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) #define USER_LIMIT	((STACK_TOP_MAX + PGDIR_SIZE - 1) & PGDIR_MASK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) void make_lowmem_page_readonly(void *vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 	pte_t *pte, ptev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 	unsigned long address = (unsigned long)vaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 	unsigned int level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 	pte = lookup_address(address, &level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 	if (pte == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 		return;		/* vaddr missing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 	ptev = pte_wrprotect(*pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 	if (HYPERVISOR_update_va_mapping(address, ptev, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) void make_lowmem_page_readwrite(void *vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 	pte_t *pte, ptev;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 	unsigned long address = (unsigned long)vaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 	unsigned int level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 	pte = lookup_address(address, &level);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 	if (pte == NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 		return;		/* vaddr missing */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 	ptev = pte_mkwrite(*pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 	if (HYPERVISOR_update_va_mapping(address, ptev, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159)  * During early boot all page table pages are pinned, but we do not have struct
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160)  * pages, so return true until struct pages are ready.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) static bool xen_page_pinned(void *ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) 	if (static_branch_likely(&xen_struct_pages_ready)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 		struct page *page = virt_to_page(ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) 		return PagePinned(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) static void xen_extend_mmu_update(const struct mmu_update *update)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 	struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 	struct mmu_update *u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 	mcs = xen_mc_extend_args(__HYPERVISOR_mmu_update, sizeof(*u));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 	if (mcs.mc != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 		mcs.mc->args[1]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 		mcs = __xen_mc_entry(sizeof(*u));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) 		MULTI_mmu_update(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186) 	u = mcs.args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187) 	*u = *update;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190) static void xen_extend_mmuext_op(const struct mmuext_op *op)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192) 	struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193) 	struct mmuext_op *u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195) 	mcs = xen_mc_extend_args(__HYPERVISOR_mmuext_op, sizeof(*u));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197) 	if (mcs.mc != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198) 		mcs.mc->args[1]++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200) 		mcs = __xen_mc_entry(sizeof(*u));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201) 		MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) 	u = mcs.args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) 	*u = *op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) static void xen_set_pmd_hyper(pmd_t *ptr, pmd_t val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 	struct mmu_update u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 	preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 	xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	/* ptr may be ioremapped for 64-bit pagetable setup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 	u.ptr = arbitrary_virt_to_machine(ptr).maddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 	u.val = pmd_val_ma(val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 	xen_extend_mmu_update(&u);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 	xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 	preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) static void xen_set_pmd(pmd_t *ptr, pmd_t val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 	trace_xen_mmu_set_pmd(ptr, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 	/* If page is not pinned, we can just update the entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 	   directly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 	if (!xen_page_pinned(ptr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 		*ptr = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 	xen_set_pmd_hyper(ptr, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241)  * Associate a virtual page frame with a given physical page frame
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242)  * and protection flags for that frame.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244) void set_pte_mfn(unsigned long vaddr, unsigned long mfn, pgprot_t flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246) 	set_pte_vaddr(vaddr, mfn_pte(mfn, flags));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249) static bool xen_batched_set_pte(pte_t *ptep, pte_t pteval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251) 	struct mmu_update u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) 	if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) 	xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) 	u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259) 	u.val = pte_val_ma(pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260) 	xen_extend_mmu_update(&u);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262) 	xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) static inline void __xen_set_pte(pte_t *ptep, pte_t pteval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 	if (!xen_batched_set_pte(ptep, pteval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 		 * Could call native_set_pte() here and trap and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 		 * emulate the PTE write, but a hypercall is much cheaper.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) 		struct mmu_update u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) 		u.ptr = virt_to_machine(ptep).maddr | MMU_NORMAL_PT_UPDATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277) 		u.val = pte_val_ma(pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278) 		HYPERVISOR_mmu_update(&u, 1, NULL, DOMID_SELF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282) static void xen_set_pte(pte_t *ptep, pte_t pteval)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284) 	trace_xen_mmu_set_pte(ptep, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285) 	__xen_set_pte(ptep, pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288) pte_t xen_ptep_modify_prot_start(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289) 				 unsigned long addr, pte_t *ptep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291) 	/* Just return the pte as-is.  We preserve the bits on commit */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292) 	trace_xen_mmu_ptep_modify_prot_start(vma->vm_mm, addr, ptep, *ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293) 	return *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296) void xen_ptep_modify_prot_commit(struct vm_area_struct *vma, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297) 				 pte_t *ptep, pte_t pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 	struct mmu_update u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	trace_xen_mmu_ptep_modify_prot_commit(vma->vm_mm, addr, ptep, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 	xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 	u.ptr = virt_to_machine(ptep).maddr | MMU_PT_UPDATE_PRESERVE_AD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 	u.val = pte_val_ma(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	xen_extend_mmu_update(&u);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 	xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) /* Assume pteval_t is equivalent to all the other *val_t types. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) static pteval_t pte_mfn_to_pfn(pteval_t val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	if (val & _PAGE_PRESENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 		unsigned long mfn = (val & XEN_PTE_MFN_MASK) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 		unsigned long pfn = mfn_to_pfn(mfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 		pteval_t flags = val & PTE_FLAGS_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 		if (unlikely(pfn == ~0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 			val = flags & ~_PAGE_PRESENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 			val = ((pteval_t)pfn << PAGE_SHIFT) | flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 	return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) static pteval_t pte_pfn_to_mfn(pteval_t val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 	if (val & _PAGE_PRESENT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 		unsigned long pfn = (val & PTE_PFN_MASK) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 		pteval_t flags = val & PTE_FLAGS_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 		unsigned long mfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 		mfn = __pfn_to_mfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 		 * If there's no mfn for the pfn, then just create an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 		 * empty non-present pte.  Unfortunately this loses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) 		 * information about the original pfn, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) 		 * pte_mfn_to_pfn is asymmetric.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) 		if (unlikely(mfn == INVALID_P2M_ENTRY)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344) 			mfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345) 			flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346) 		} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347) 			mfn &= ~(FOREIGN_FRAME_BIT | IDENTITY_FRAME_BIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348) 		val = ((pteval_t)mfn << PAGE_SHIFT) | flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351) 	return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) __visible pteval_t xen_pte_val(pte_t pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	pteval_t pteval = pte.pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	return pte_mfn_to_pfn(pteval);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) PV_CALLEE_SAVE_REGS_THUNK(xen_pte_val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) __visible pgdval_t xen_pgd_val(pgd_t pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	return pte_mfn_to_pfn(pgd.pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) PV_CALLEE_SAVE_REGS_THUNK(xen_pgd_val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) __visible pte_t xen_make_pte(pteval_t pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) 	pte = pte_pfn_to_mfn(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 	return native_make_pte(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) __visible pgd_t xen_make_pgd(pgdval_t pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	pgd = pte_pfn_to_mfn(pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	return native_make_pgd(pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) PV_CALLEE_SAVE_REGS_THUNK(xen_make_pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) __visible pmdval_t xen_pmd_val(pmd_t pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	return pte_mfn_to_pfn(pmd.pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) PV_CALLEE_SAVE_REGS_THUNK(xen_pmd_val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) static void xen_set_pud_hyper(pud_t *ptr, pud_t val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) 	struct mmu_update u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) 	preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 	/* ptr may be ioremapped for 64-bit pagetable setup */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	u.ptr = arbitrary_virt_to_machine(ptr).maddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 	u.val = pud_val_ma(val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 	xen_extend_mmu_update(&u);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 	xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 	preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) static void xen_set_pud(pud_t *ptr, pud_t val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 	trace_xen_mmu_set_pud(ptr, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	/* If page is not pinned, we can just update the entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	   directly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) 	if (!xen_page_pinned(ptr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 		*ptr = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418) 	xen_set_pud_hyper(ptr, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) __visible pmd_t xen_make_pmd(pmdval_t pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) 	pmd = pte_pfn_to_mfn(pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 	return native_make_pmd(pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) __visible pudval_t xen_pud_val(pud_t pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	return pte_mfn_to_pfn(pud.pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) PV_CALLEE_SAVE_REGS_THUNK(xen_pud_val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) __visible pud_t xen_make_pud(pudval_t pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	pud = pte_pfn_to_mfn(pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 	return native_make_pud(pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) PV_CALLEE_SAVE_REGS_THUNK(xen_make_pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) static pgd_t *xen_get_user_pgd(pgd_t *pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	pgd_t *pgd_page = (pgd_t *)(((unsigned long)pgd) & PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	unsigned offset = pgd - pgd_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 	pgd_t *user_ptr = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 	if (offset < pgd_index(USER_LIMIT)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 		struct page *page = virt_to_page(pgd_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 		user_ptr = (pgd_t *)page->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 		if (user_ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 			user_ptr += offset;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 	return user_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) static void __xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 	struct mmu_update u;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 	u.ptr = virt_to_machine(ptr).maddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	u.val = p4d_val_ma(val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 	xen_extend_mmu_update(&u);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468)  * Raw hypercall-based set_p4d, intended for in early boot before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469)  * there's a page structure.  This implies:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470)  *  1. The only existing pagetable is the kernel's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471)  *  2. It is always pinned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472)  *  3. It has no user pagetable attached to it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) static void __init xen_set_p4d_hyper(p4d_t *ptr, p4d_t val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 	preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 	xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 	__xen_set_p4d_hyper(ptr, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 	preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) static void xen_set_p4d(p4d_t *ptr, p4d_t val)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 	pgd_t *user_ptr = xen_get_user_pgd((pgd_t *)ptr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 	pgd_t pgd_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 	trace_xen_mmu_set_p4d(ptr, (p4d_t *)user_ptr, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 	/* If page is not pinned, we can just update the entry
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 	   directly */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 	if (!xen_page_pinned(ptr)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 		*ptr = val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 		if (user_ptr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 			WARN_ON(xen_page_pinned(user_ptr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 			pgd_val.pgd = p4d_val_ma(val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 			*user_ptr = pgd_val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 	/* If it's pinned, then we can at least batch the kernel and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 	   user updates together. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	__xen_set_p4d_hyper(ptr, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	if (user_ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		__xen_set_p4d_hyper((p4d_t *)user_ptr, val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) #if CONFIG_PGTABLE_LEVELS >= 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) __visible p4dval_t xen_p4d_val(p4d_t p4d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	return pte_mfn_to_pfn(p4d.p4d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) PV_CALLEE_SAVE_REGS_THUNK(xen_p4d_val);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) __visible p4d_t xen_make_p4d(p4dval_t p4d)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 	p4d = pte_pfn_to_mfn(p4d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	return native_make_p4d(p4d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) PV_CALLEE_SAVE_REGS_THUNK(xen_make_p4d);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) #endif  /* CONFIG_PGTABLE_LEVELS >= 5 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) static void xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 			 void (*func)(struct mm_struct *mm, struct page *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 				      enum pt_level),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 			 bool last, unsigned long limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	int i, nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 	nr = last ? pmd_index(limit) + 1 : PTRS_PER_PMD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 	for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 		if (!pmd_none(pmd[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 			(*func)(mm, pmd_page(pmd[i]), PT_PTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) static void xen_pud_walk(struct mm_struct *mm, pud_t *pud,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 			 void (*func)(struct mm_struct *mm, struct page *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 				      enum pt_level),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 			 bool last, unsigned long limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 	int i, nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 	nr = last ? pud_index(limit) + 1 : PTRS_PER_PUD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 	for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 		pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 		if (pud_none(pud[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 		pmd = pmd_offset(&pud[i], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 		if (PTRS_PER_PMD > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 			(*func)(mm, virt_to_page(pmd), PT_PMD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 		xen_pmd_walk(mm, pmd, func, last && i == nr - 1, limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) static void xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 			 void (*func)(struct mm_struct *mm, struct page *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 				      enum pt_level),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 			 bool last, unsigned long limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 	if (p4d_none(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) 	pud = pud_offset(p4d, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	if (PTRS_PER_PUD > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 		(*func)(mm, virt_to_page(pud), PT_PUD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	xen_pud_walk(mm, pud, func, last, limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586)  * (Yet another) pagetable walker.  This one is intended for pinning a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587)  * pagetable.  This means that it walks a pagetable and calls the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588)  * callback function on each page it finds making up the page table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589)  * at every level.  It walks the entire pagetable, but it only bothers
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590)  * pinning pte pages which are below limit.  In the normal case this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591)  * will be STACK_TOP_MAX, but at boot we need to pin up to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592)  * FIXADDR_TOP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594)  * We must skip the Xen hole in the middle of the address space, just after
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595)  * the big x86-64 virtual hole.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) static void __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 			   void (*func)(struct mm_struct *mm, struct page *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 					enum pt_level),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 			   unsigned long limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 	int i, nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 	unsigned hole_low = 0, hole_high = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 	/* The limit is the last byte to be touched */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	limit--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) 	BUG_ON(limit >= FIXADDR_TOP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 	 * 64-bit has a great big hole in the middle of the address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 	 * space, which contains the Xen mappings.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 	hole_low = pgd_index(GUARD_HOLE_BASE_ADDR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 	hole_high = pgd_index(GUARD_HOLE_END_ADDR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 	nr = pgd_index(limit) + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 	for (i = 0; i < nr; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 		p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 		if (i >= hole_low && i < hole_high)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 		if (pgd_none(pgd[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 		p4d = p4d_offset(&pgd[i], 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 		xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	/* Do the top level last, so that the callbacks can use it as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	   a cue to do final things like tlb flushes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 	(*func)(mm, virt_to_page(pgd), PT_PGD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) static void xen_pgd_walk(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 			 void (*func)(struct mm_struct *mm, struct page *,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) 				      enum pt_level),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 			 unsigned long limit)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 	__xen_pgd_walk(mm, mm->pgd, func, limit);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) /* If we're using split pte locks, then take the page's lock and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644)    return a pointer to it.  Otherwise return NULL. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) static spinlock_t *xen_pte_lock(struct page *page, struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 	spinlock_t *ptl = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) #if USE_SPLIT_PTE_PTLOCKS
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	ptl = ptlock_ptr(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 	spin_lock_nest_lock(ptl, &mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	return ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) static void xen_pte_unlock(void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 	spinlock_t *ptl = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 	spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) static void xen_do_pin(unsigned level, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 	struct mmuext_op op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 	op.cmd = level;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 	op.arg1.mfn = pfn_to_mfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 	xen_extend_mmuext_op(&op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) static void xen_pin_page(struct mm_struct *mm, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 			 enum pt_level level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 	unsigned pgfl = TestSetPagePinned(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 	if (!pgfl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 		void *pt = lowmem_page_address(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 		unsigned long pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 		struct multicall_space mcs = __xen_mc_entry(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 		spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 		 * We need to hold the pagetable lock between the time
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 		 * we make the pagetable RO and when we actually pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 		 * it.  If we don't, then other users may come in and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) 		 * attempt to update the pagetable by writing it,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 		 * which will fail because the memory is RO but not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) 		 * pinned, so Xen won't do the trap'n'emulate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 		 * If we're using split pte locks, we can't hold the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 		 * entire pagetable's worth of locks during the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) 		 * traverse, because we may wrap the preempt count (8
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 		 * bits).  The solution is to mark RO and pin each PTE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 		 * page while holding the lock.  This means the number
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 		 * of locks we end up holding is never more than a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 		 * batch size (~32 entries, at present).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 		 * If we're not using split pte locks, we needn't pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 		 * the PTE pages independently, because we're
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 		 * protected by the overall pagetable lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 		ptl = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 		if (level == PT_PTE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 			ptl = xen_pte_lock(page, mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 		MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 					pfn_pte(pfn, PAGE_KERNEL_RO),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 					level == PT_PGD ? UVMF_TLB_FLUSH : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 		if (ptl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 			xen_do_pin(MMUEXT_PIN_L1_TABLE, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 			/* Queue a deferred unlock for when this batch
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 			   is completed. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 			xen_mc_callback(xen_pte_unlock, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) /* This is called just after a mm has been created, but it has not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723)    been used yet.  We need to make sure that its pagetable is all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724)    read-only, and can be pinned. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) static void __xen_pgd_pin(struct mm_struct *mm, pgd_t *pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 	pgd_t *user_pgd = xen_get_user_pgd(pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 	trace_xen_mmu_pgd_pin(mm, pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 	xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 	__xen_pgd_walk(mm, pgd, xen_pin_page, USER_LIMIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	xen_do_pin(MMUEXT_PIN_L4_TABLE, PFN_DOWN(__pa(pgd)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 	if (user_pgd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 		xen_pin_page(mm, virt_to_page(user_pgd), PT_PGD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 		xen_do_pin(MMUEXT_PIN_L4_TABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 			   PFN_DOWN(__pa(user_pgd)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 	xen_mc_issue(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) static void xen_pgd_pin(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 	__xen_pgd_pin(mm, mm->pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752)  * On save, we need to pin all pagetables to make sure they get their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753)  * mfns turned into pfns.  Search the list for any unpinned pgds and pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754)  * them (unpinned pgds are not currently in use, probably because the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755)  * process is under construction or destruction).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757)  * Expected to be called in stop_machine() ("equivalent to taking
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758)  * every spinlock in the system"), so the locking doesn't really
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759)  * matter all that much.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761) void xen_mm_pin_all(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765) 	spin_lock(&pgd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767) 	list_for_each_entry(page, &pgd_list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768) 		if (!PagePinned(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769) 			__xen_pgd_pin(&init_mm, (pgd_t *)page_address(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770) 			SetPageSavePinned(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774) 	spin_unlock(&pgd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) static void __init xen_mark_pinned(struct mm_struct *mm, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) 				   enum pt_level level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	SetPagePinned(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784)  * The init_mm pagetable is really pinned as soon as its created, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785)  * that's before we have page structures to store the bits.  So do all
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786)  * the book-keeping now once struct pages for allocated pages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787)  * initialized. This happens only after memblock_free_all() is called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) static void __init xen_after_bootmem(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 	static_branch_enable(&xen_struct_pages_ready);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	SetPagePinned(virt_to_page(level3_user_vsyscall));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 	xen_pgd_walk(&init_mm, xen_mark_pinned, FIXADDR_TOP);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) static void xen_unpin_page(struct mm_struct *mm, struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 			   enum pt_level level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 	unsigned pgfl = TestClearPagePinned(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 	if (pgfl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 		void *pt = lowmem_page_address(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 		unsigned long pfn = page_to_pfn(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 		spinlock_t *ptl = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 		struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 		 * Do the converse to pin_page.  If we're using split
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 		 * pte locks, we must be holding the lock for while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 		 * the pte page is unpinned but still RO to prevent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 		 * concurrent updates from seeing it in this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 		 * partially-pinned state.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 		if (level == PT_PTE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) 			ptl = xen_pte_lock(page, mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) 			if (ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 				xen_do_pin(MMUEXT_UNPIN_TABLE, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 		mcs = __xen_mc_entry(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 		MULTI_update_va_mapping(mcs.mc, (unsigned long)pt,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 					pfn_pte(pfn, PAGE_KERNEL),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) 					level == PT_PGD ? UVMF_TLB_FLUSH : 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) 		if (ptl) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 			/* unlock when batch completed */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 			xen_mc_callback(xen_pte_unlock, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) /* Release a pagetables pages back as normal RW */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) static void __xen_pgd_unpin(struct mm_struct *mm, pgd_t *pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 	pgd_t *user_pgd = xen_get_user_pgd(pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 	trace_xen_mmu_pgd_unpin(mm, pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	xen_do_pin(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 	if (user_pgd) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 		xen_do_pin(MMUEXT_UNPIN_TABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 			   PFN_DOWN(__pa(user_pgd)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 		xen_unpin_page(mm, virt_to_page(user_pgd), PT_PGD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	__xen_pgd_walk(mm, pgd, xen_unpin_page, USER_LIMIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	xen_mc_issue(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) static void xen_pgd_unpin(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 	__xen_pgd_unpin(mm, mm->pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862)  * On resume, undo any pinning done at save, so that the rest of the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863)  * kernel doesn't see any unexpected pinned pagetables.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) void xen_mm_unpin_all(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	spin_lock(&pgd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 	list_for_each_entry(page, &pgd_list, lru) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 		if (PageSavePinned(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) 			BUG_ON(!PagePinned(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 			__xen_pgd_unpin(&init_mm, (pgd_t *)page_address(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) 			ClearPageSavePinned(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 	spin_unlock(&pgd_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882) static void xen_activate_mm(struct mm_struct *prev, struct mm_struct *next)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884) 	spin_lock(&next->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) 	xen_pgd_pin(next);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 	spin_unlock(&next->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) static void xen_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	spin_lock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	xen_pgd_pin(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 	spin_unlock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) static void drop_mm_ref_this_cpu(void *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 	struct mm_struct *mm = info;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	if (this_cpu_read(cpu_tlbstate.loaded_mm) == mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 		leave_mm(smp_processor_id());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 	 * If this cpu still has a stale cr3 reference, then make sure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 	 * it has been flushed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 	if (this_cpu_read(xen_current_cr3) == __pa(mm->pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 		xen_mc_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) #ifdef CONFIG_SMP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913)  * Another cpu may still have their %cr3 pointing at the pagetable, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914)  * we need to repoint it somewhere else before we can unpin it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) static void xen_drop_mm_ref(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 	cpumask_var_t mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	unsigned cpu;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 	drop_mm_ref_this_cpu(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	/* Get the "official" set of cpus referring to our pagetable. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 	if (!alloc_cpumask_var(&mask, GFP_ATOMIC)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 		for_each_online_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 			if (per_cpu(xen_current_cr3, cpu) != __pa(mm->pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 			smp_call_function_single(cpu, drop_mm_ref_this_cpu, mm, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	 * It's possible that a vcpu may have a stale reference to our
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 	 * cr3, because its in lazy mode, and it hasn't yet flushed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	 * its set of pending hypercalls yet.  In this case, we can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) 	 * look at its actual current cr3 value, and force it to flush
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 	 * if needed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) 	cpumask_clear(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 	for_each_online_cpu(cpu) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 		if (per_cpu(xen_current_cr3, cpu) == __pa(mm->pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 			cpumask_set_cpu(cpu, mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 	smp_call_function_many(mask, drop_mm_ref_this_cpu, mm, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 	free_cpumask_var(mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) static void xen_drop_mm_ref(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 	drop_mm_ref_this_cpu(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957)  * While a process runs, Xen pins its pagetables, which means that the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958)  * hypervisor forces it to be read-only, and it controls all updates
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959)  * to it.  This means that all pagetable updates have to go via the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960)  * hypervisor, which is moderately expensive.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962)  * Since we're pulling the pagetable down, we switch to use init_mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963)  * unpin old process pagetable and mark it all read-write, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964)  * allows further operations on it to be simple memory accesses.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966)  * The only subtle point is that another CPU may be still using the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967)  * pagetable because of lazy tlb flushing.  This means we need need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968)  * switch all CPUs off this pagetable before we can unpin it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) static void xen_exit_mmap(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 	get_cpu();		/* make sure we don't move around */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 	xen_drop_mm_ref(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 	put_cpu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 	spin_lock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 	/* pgd may not be pinned in the error exit path of execve */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	if (xen_page_pinned(mm->pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 		xen_pgd_unpin(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	spin_unlock(&mm->page_table_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) static void xen_post_allocator_init(void);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) static void __init pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) 	struct mmuext_op op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991) 	op.cmd = cmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992) 	op.arg1.mfn = pfn_to_mfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993) 	if (HYPERVISOR_mmuext_op(&op, 1, NULL, DOMID_SELF))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997) static void __init xen_cleanhighmap(unsigned long vaddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998) 				    unsigned long vaddr_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000) 	unsigned long kernel_end = roundup((unsigned long)_brk_end, PMD_SIZE) - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001) 	pmd_t *pmd = level2_kernel_pgt + pmd_index(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003) 	/* NOTE: The loop is more greedy than the cleanup_highmap variant.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004) 	 * We include the PMD passed in on _both_ boundaries. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005) 	for (; vaddr <= vaddr_end && (pmd < (level2_kernel_pgt + PTRS_PER_PMD));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006) 			pmd++, vaddr += PMD_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007) 		if (pmd_none(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009) 		if (vaddr < (unsigned long) _text || vaddr > kernel_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010) 			set_pmd(pmd, __pmd(0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012) 	/* In case we did something silly, we should crash in this function
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013) 	 * instead of somewhere later and be confusing. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014) 	xen_mc_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018)  * Make a page range writeable and free it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020) static void __init xen_free_ro_pages(unsigned long paddr, unsigned long size)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022) 	void *vaddr = __va(paddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023) 	void *vaddr_end = vaddr + size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025) 	for (; vaddr < vaddr_end; vaddr += PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026) 		make_lowmem_page_readwrite(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028) 	memblock_free(paddr, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031) static void __init xen_cleanmfnmap_free_pgtbl(void *pgtbl, bool unpin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033) 	unsigned long pa = __pa(pgtbl) & PHYSICAL_PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035) 	if (unpin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036) 		pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(pa));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037) 	ClearPagePinned(virt_to_page(__va(pa)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038) 	xen_free_ro_pages(pa, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041) static void __init xen_cleanmfnmap_pmd(pmd_t *pmd, bool unpin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043) 	unsigned long pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044) 	pte_t *pte_tbl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047) 	if (pmd_large(*pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048) 		pa = pmd_val(*pmd) & PHYSICAL_PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) 		xen_free_ro_pages(pa, PMD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) 	pte_tbl = pte_offset_kernel(pmd, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	for (i = 0; i < PTRS_PER_PTE; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 		if (pte_none(pte_tbl[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 		pa = pte_pfn(pte_tbl[i]) << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 		xen_free_ro_pages(pa, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 	set_pmd(pmd, __pmd(0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	xen_cleanmfnmap_free_pgtbl(pte_tbl, unpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) static void __init xen_cleanmfnmap_pud(pud_t *pud, bool unpin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	unsigned long pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	pmd_t *pmd_tbl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	if (pud_large(*pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 		pa = pud_val(*pud) & PHYSICAL_PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 		xen_free_ro_pages(pa, PUD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 	pmd_tbl = pmd_offset(pud, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 	for (i = 0; i < PTRS_PER_PMD; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 		if (pmd_none(pmd_tbl[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 		xen_cleanmfnmap_pmd(pmd_tbl + i, unpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 	set_pud(pud, __pud(0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 	xen_cleanmfnmap_free_pgtbl(pmd_tbl, unpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) static void __init xen_cleanmfnmap_p4d(p4d_t *p4d, bool unpin)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 	unsigned long pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 	pud_t *pud_tbl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 	if (p4d_large(*p4d)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 		pa = p4d_val(*p4d) & PHYSICAL_PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 		xen_free_ro_pages(pa, P4D_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 	pud_tbl = pud_offset(p4d, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 	for (i = 0; i < PTRS_PER_PUD; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 		if (pud_none(pud_tbl[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 		xen_cleanmfnmap_pud(pud_tbl + i, unpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 	set_p4d(p4d, __p4d(0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 	xen_cleanmfnmap_free_pgtbl(pud_tbl, unpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109)  * Since it is well isolated we can (and since it is perhaps large we should)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110)  * also free the page tables mapping the initial P->M table.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) static void __init xen_cleanmfnmap(unsigned long vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) 	bool unpin;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 	unpin = (vaddr == 2 * PGDIR_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 	vaddr &= PMD_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 	pgd = pgd_offset_k(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 	p4d = p4d_offset(pgd, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 	if (!p4d_none(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 		xen_cleanmfnmap_p4d(p4d, unpin);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) static void __init xen_pagetable_p2m_free(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 	unsigned long size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 	/* No memory or already called. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 	if ((unsigned long)xen_p2m_addr == xen_start_info->mfn_list)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 	/* using __ka address and sticking INVALID_P2M_ENTRY! */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 	memset((void *)xen_start_info->mfn_list, 0xff, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 	addr = xen_start_info->mfn_list;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 	 * We could be in __ka space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 	 * We roundup to the PMD, which means that if anybody at this stage is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 	 * using the __ka address of xen_start_info or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 	 * xen_start_info->shared_info they are in going to crash. Fortunately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 	 * we have already revectored in xen_setup_kernel_pagetable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 	size = roundup(size, PMD_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 	if (addr >= __START_KERNEL_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 		xen_cleanhighmap(addr, addr + size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 		size = PAGE_ALIGN(xen_start_info->nr_pages *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 				  sizeof(unsigned long));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 		memblock_free(__pa(addr), size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 		xen_cleanmfnmap(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) static void __init xen_pagetable_cleanhighmap(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 	unsigned long size;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 	unsigned long addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 	/* At this stage, cleanup_highmap has already cleaned __ka space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 	 * from _brk_limit way up to the max_pfn_mapped (which is the end of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 	 * the ramdisk). We continue on, erasing PMD entries that point to page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 	 * tables - do note that they are accessible at this stage via __va.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 	 * As Xen is aligning the memory end to a 4MB boundary, for good
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 	 * measure we also round up to PMD_SIZE * 2 - which means that if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	 * anybody is using __ka address to the initial boot-stack - and try
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) 	 * to use it - they are going to crash. The xen_start_info has been
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	 * taken care of already in xen_setup_kernel_pagetable. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 	addr = xen_start_info->pt_base;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	size = xen_start_info->nr_pt_frames * PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 	xen_cleanhighmap(addr, roundup(addr + size, PMD_SIZE * 2));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) 	xen_start_info->pt_base = (unsigned long)__va(__pa(xen_start_info->pt_base));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) static void __init xen_pagetable_p2m_setup(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	xen_vmalloc_p2m_tree();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	xen_pagetable_p2m_free();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 	xen_pagetable_cleanhighmap();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	/* And revector! Bye bye old array */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) static void __init xen_pagetable_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	paging_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 	xen_post_allocator_init();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	xen_pagetable_p2m_setup();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 	/* Allocate and initialize top and mid mfn levels for p2m structure */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) 	xen_build_mfn_list_list();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203) 	/* Remap memory freed due to conflicts with E820 map */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204) 	xen_remap_memory();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205) 	xen_setup_mfn_list_list();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207) static void xen_write_cr2(unsigned long cr2)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209) 	this_cpu_read(xen_vcpu)->arch.cr2 = cr2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212) static noinline void xen_flush_tlb(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214) 	struct mmuext_op *op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215) 	struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217) 	preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219) 	mcs = xen_mc_entry(sizeof(*op));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221) 	op = mcs.args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222) 	op->cmd = MMUEXT_TLB_FLUSH_LOCAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223) 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225) 	xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227) 	preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) static void xen_flush_tlb_one_user(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 	struct mmuext_op *op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) 	struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	trace_xen_mmu_flush_tlb_one_user(addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	mcs = xen_mc_entry(sizeof(*op));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 	op = mcs.args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 	op->cmd = MMUEXT_INVLPG_LOCAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) 	op->arg1.linear_addr = addr & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 	xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) static void xen_flush_tlb_others(const struct cpumask *cpus,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 				 const struct flush_tlb_info *info)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 	struct {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 		struct mmuext_op op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 		DECLARE_BITMAP(mask, NR_CPUS);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	} *args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 	struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 	const size_t mc_entry_size = sizeof(args->op) +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 		sizeof(args->mask[0]) * BITS_TO_LONGS(num_possible_cpus());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 	trace_xen_mmu_flush_tlb_others(cpus, info->mm, info->start, info->end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 	if (cpumask_empty(cpus))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 		return;		/* nothing to do */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 	mcs = xen_mc_entry(mc_entry_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 	args = mcs.args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 	args->op.arg2.vcpumask = to_cpumask(args->mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 	/* Remove us, and any offline CPUS. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 	cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) 	cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 	args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) 	if (info->end != TLB_FLUSH_ALL &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276) 	    (info->end - info->start) <= PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277) 		args->op.cmd = MMUEXT_INVLPG_MULTI;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278) 		args->op.arg1.linear_addr = info->start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 	MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 	xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) static unsigned long xen_read_cr3(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	return this_cpu_read(xen_cr3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) static void set_current_cr3(void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 	this_cpu_write(xen_current_cr3, (unsigned long)v);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) static void __xen_write_cr3(bool kernel, unsigned long cr3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 	struct mmuext_op op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 	unsigned long mfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	trace_xen_mmu_write_cr3(kernel, cr3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	if (cr3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 		mfn = pfn_to_mfn(PFN_DOWN(cr3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 		mfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	WARN_ON(mfn == 0 && kernel);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 	op.cmd = kernel ? MMUEXT_NEW_BASEPTR : MMUEXT_NEW_USER_BASEPTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 	op.arg1.mfn = mfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 	xen_extend_mmuext_op(&op);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 	if (kernel) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 		this_cpu_write(xen_cr3, cr3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 		/* Update xen_current_cr3 once the batch has actually
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 		   been submitted. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 		xen_mc_callback(set_current_cr3, (void *)cr3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) static void xen_write_cr3(unsigned long cr3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 	pgd_t *user_pgd = xen_get_user_pgd(__va(cr3));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 	BUG_ON(preemptible());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 	xen_mc_batch();  /* disables interrupts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 	/* Update while interrupts are disabled, so its atomic with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 	   respect to ipis */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 	this_cpu_write(xen_cr3, cr3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 	__xen_write_cr3(true, cr3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 	if (user_pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 		__xen_write_cr3(false, __pa(user_pgd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 		__xen_write_cr3(false, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 	xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346)  * At the start of the day - when Xen launches a guest, it has already
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347)  * built pagetables for the guest. We diligently look over them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348)  * in xen_setup_kernel_pagetable and graft as appropriate them in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349)  * init_top_pgt and its friends. Then when we are happy we load
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350)  * the new init_top_pgt - and continue on.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352)  * The generic code starts (start_kernel) and 'init_mem_mapping' sets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353)  * up the rest of the pagetables. When it has completed it loads the cr3.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354)  * N.B. that baremetal would start at 'start_kernel' (and the early
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355)  * #PF handler would create bootstrap pagetables) - so we are running
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356)  * with the same assumptions as what to do when write_cr3 is executed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357)  * at this point.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359)  * Since there are no user-page tables at all, we have two variants
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360)  * of xen_write_cr3 - the early bootup (this one), and the late one
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361)  * (xen_write_cr3). The reason we have to do that is that in 64-bit
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362)  * the Linux kernel and user-space are both in ring 3 while the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363)  * hypervisor is in ring 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) static void __init xen_write_cr3_init(unsigned long cr3)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 	BUG_ON(preemptible());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 	xen_mc_batch();  /* disables interrupts */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 	/* Update while interrupts are disabled, so its atomic with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 	   respect to ipis */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 	this_cpu_write(xen_cr3, cr3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 	__xen_write_cr3(true, cr3);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 	xen_mc_issue(PARAVIRT_LAZY_CPU);  /* interrupts restored */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) static int xen_pgd_alloc(struct mm_struct *mm)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 	pgd_t *pgd = mm->pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 	struct page *page = virt_to_page(pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 	pgd_t *user_pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 	int ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 	BUG_ON(PagePinned(virt_to_page(pgd)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 	BUG_ON(page->private != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 	user_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 	page->private = (unsigned long)user_pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 	if (user_pgd != NULL) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) #ifdef CONFIG_X86_VSYSCALL_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 		user_pgd[pgd_index(VSYSCALL_ADDR)] =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 			__pgd(__pa(level3_user_vsyscall) | _PAGE_TABLE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 	BUG_ON(PagePinned(virt_to_page(xen_get_user_pgd(pgd))));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) static void xen_pgd_free(struct mm_struct *mm, pgd_t *pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408) 	pgd_t *user_pgd = xen_get_user_pgd(pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410) 	if (user_pgd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411) 		free_page((unsigned long)user_pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)  * Init-time set_pte while constructing initial pagetables, which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)  * doesn't allow RO page table pages to be remapped RW.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418)  * If there is no MFN for this PFN then this page is initially
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)  * ballooned out so clear the PTE (as in decrease_reservation() in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420)  * drivers/xen/balloon.c).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422)  * Many of these PTE updates are done on unpinned and writable pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)  * and doing a hypercall for these is unnecessary and expensive.  At
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424)  * this point it is not possible to tell if a page is pinned or not,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425)  * so always write the PTE directly and rely on Xen trapping and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426)  * emulating any updates as necessary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) __visible pte_t xen_make_pte_init(pteval_t pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 	unsigned long pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 	 * Pages belonging to the initial p2m list mapped outside the default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 	 * address range must be mapped read-only. This region contains the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 	 * page tables for mapping the p2m list, too, and page tables MUST be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 	 * mapped read-only.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 	pfn = (pte & PTE_PFN_MASK) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 	if (xen_start_info->mfn_list < __START_KERNEL_map &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 	    pfn >= xen_start_info->first_p2m_pfn &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 	    pfn < xen_start_info->first_p2m_pfn + xen_start_info->nr_p2m_frames)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 		pte &= ~_PAGE_RW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	pte = pte_pfn_to_mfn(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	return native_make_pte(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) PV_CALLEE_SAVE_REGS_THUNK(xen_make_pte_init);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) static void __init xen_set_pte_init(pte_t *ptep, pte_t pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 	__xen_set_pte(ptep, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) /* Early in boot, while setting up the initial pagetable, assume
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455)    everything is pinned. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) static void __init xen_alloc_pte_init(struct mm_struct *mm, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) #ifdef CONFIG_FLATMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 	BUG_ON(mem_map);	/* should only be used early */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) /* Used for pmd and pud */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) static void __init xen_alloc_pmd_init(struct mm_struct *mm, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468) #ifdef CONFIG_FLATMEM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469) 	BUG_ON(mem_map);	/* should only be used early */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471) 	make_lowmem_page_readonly(__va(PFN_PHYS(pfn)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) /* Early release_pte assumes that all pts are pinned, since there's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475)    only init_mm and anything attached to that is pinned. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) static void __init xen_release_pte_init(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) static void __init xen_release_pmd_init(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 	make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) static inline void __pin_pagetable_pfn(unsigned cmd, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 	struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 	struct mmuext_op *op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 	mcs = __xen_mc_entry(sizeof(*op));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 	op = mcs.args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 	op->cmd = cmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 	op->arg1.mfn = pfn_to_mfn(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 	MULTI_mmuext_op(mcs.mc, mcs.args, 1, NULL, DOMID_SELF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) static inline void __set_pfn_prot(unsigned long pfn, pgprot_t prot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 	struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 	unsigned long addr = (unsigned long)__va(pfn << PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 	mcs = __xen_mc_entry(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 	MULTI_update_va_mapping(mcs.mc, (unsigned long)addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 				pfn_pte(pfn, prot), 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) /* This needs to make sure the new pte page is pinned iff its being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511)    attached to a pinned pagetable. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) static inline void xen_alloc_ptpage(struct mm_struct *mm, unsigned long pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 				    unsigned level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 	bool pinned = xen_page_pinned(mm->pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 	trace_xen_mmu_alloc_ptpage(mm, pfn, level, pinned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 	if (pinned) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 		struct page *page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 		if (static_branch_likely(&xen_struct_pages_ready))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 			SetPagePinned(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) 		xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 		__set_pfn_prot(pfn, PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 		if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) 			__pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 		xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) static void xen_alloc_pte(struct mm_struct *mm, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	xen_alloc_ptpage(mm, pfn, PT_PTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) static void xen_alloc_pmd(struct mm_struct *mm, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 	xen_alloc_ptpage(mm, pfn, PT_PMD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) /* This should never happen until we're OK to use struct page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) static inline void xen_release_ptpage(unsigned long pfn, unsigned level)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 	struct page *page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 	bool pinned = PagePinned(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 	trace_xen_mmu_release_ptpage(pfn, level, pinned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 	if (pinned) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 		xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 		if (level == PT_PTE && USE_SPLIT_PTE_PTLOCKS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 			__pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 		__set_pfn_prot(pfn, PAGE_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 		xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 		ClearPagePinned(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) static void xen_release_pte(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) 	xen_release_ptpage(pfn, PT_PTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573) static void xen_release_pmd(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575) 	xen_release_ptpage(pfn, PT_PMD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578) static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580) 	xen_alloc_ptpage(mm, pfn, PT_PUD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583) static void xen_release_pud(unsigned long pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) 	xen_release_ptpage(pfn, PT_PUD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589)  * Like __va(), but returns address in the kernel mapping (which is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590)  * all we have until the physical memory mapping has been set up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) static void * __init __ka(phys_addr_t paddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 	return (void *)(paddr + __START_KERNEL_map);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) /* Convert a machine address to physical address */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) static unsigned long __init m2p(phys_addr_t maddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) 	phys_addr_t paddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) 	maddr &= XEN_PTE_MFN_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) 	paddr = mfn_to_pfn(maddr >> PAGE_SHIFT) << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 	return paddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) /* Convert a machine address to kernel virtual */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) static void * __init m2v(phys_addr_t maddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 	return __ka(m2p(maddr));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) /* Set the page permissions on an identity-mapped pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) static void __init set_page_prot_flags(void *addr, pgprot_t prot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 				       unsigned long flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 	unsigned long pfn = __pa(addr) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 	pte_t pte = pfn_pte(pfn, prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) static void __init set_page_prot(void *addr, pgprot_t prot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 	return set_page_prot_flags(addr, prot, UVMF_NONE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) void __init xen_setup_machphys_mapping(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 	struct xen_machphys_mapping mapping;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 	if (HYPERVISOR_memory_op(XENMEM_machphys_mapping, &mapping) == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 		machine_to_phys_mapping = (unsigned long *)mapping.v_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 		machine_to_phys_nr = mapping.max_mfn + 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 		machine_to_phys_nr = MACH2PHYS_NR_ENTRIES;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) static void __init convert_pfn_mfn(void *v)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 	pte_t *pte = v;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 	/* All levels are converted the same way, so just treat them
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 	   as ptes. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 	for (i = 0; i < PTRS_PER_PTE; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 		pte[i] = xen_make_pte(pte[i].pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 				 unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 	if (*pt_base == PFN_DOWN(__pa(addr))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 		set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 		clear_page((void *)addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 		(*pt_base)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 	if (*pt_end == PFN_DOWN(__pa(addr))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 		set_page_prot_flags((void *)addr, PAGE_KERNEL, UVMF_INVLPG);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 		clear_page((void *)addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 		(*pt_end)--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666)  * Set up the initial kernel pagetable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668)  * We can construct this by grafting the Xen provided pagetable into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669)  * head_64.S's preconstructed pagetables.  We copy the Xen L2's into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670)  * level2_ident_pgt, and level2_kernel_pgt.  This means that only the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671)  * kernel has a physical mapping to start with - but that's enough to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672)  * get __va working.  We need to fill in the rest of the physical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673)  * mapping once some sort of allocator has been set up.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 	pud_t *l3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 	pmd_t *l2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 	unsigned long addr[3];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 	unsigned long pt_base, pt_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 	unsigned i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 	/* max_pfn_mapped is the last pfn mapped in the initial memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 	 * mappings. Considering that on Xen after the kernel mappings we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 	 * have the mappings of some pages that don't exist in pfn space, we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 	 * set max_pfn_mapped to the last real pfn mapped. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 	if (xen_start_info->mfn_list < __START_KERNEL_map)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 		max_pfn_mapped = xen_start_info->first_p2m_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 		max_pfn_mapped = PFN_DOWN(__pa(xen_start_info->mfn_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 	pt_base = PFN_DOWN(__pa(xen_start_info->pt_base));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 	pt_end = pt_base + xen_start_info->nr_pt_frames;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 	/* Zap identity mapping */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) 	init_top_pgt[0] = __pgd(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) 	/* Pre-constructed entries are in pfn, so convert to mfn */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 	/* L4[273] -> level3_ident_pgt  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 	/* L4[511] -> level3_kernel_pgt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 	convert_pfn_mfn(init_top_pgt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 	/* L3_i[0] -> level2_ident_pgt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) 	convert_pfn_mfn(level3_ident_pgt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 	/* L3_k[510] -> level2_kernel_pgt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) 	/* L3_k[511] -> level2_fixmap_pgt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) 	convert_pfn_mfn(level3_kernel_pgt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) 	/* L3_k[511][508-FIXMAP_PMD_NUM ... 507] -> level1_fixmap_pgt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710) 	convert_pfn_mfn(level2_fixmap_pgt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712) 	/* We get [511][511] and have Xen's version of level2_kernel_pgt */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) 	l3 = m2v(pgd[pgd_index(__START_KERNEL_map)].pgd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 	l2 = m2v(l3[pud_index(__START_KERNEL_map)].pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 	addr[0] = (unsigned long)pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 	addr[1] = (unsigned long)l3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 	addr[2] = (unsigned long)l2;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) 	/* Graft it onto L4[273][0]. Note that we creating an aliasing problem:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 	 * Both L4[273][0] and L4[511][510] have entries that point to the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	 * L2 (PMD) tables. Meaning that if you modify it in __va space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 	 * it will be also modified in the __ka space! (But if you just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 	 * modify the PMD table to point to other PTE's or none, then you
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 	 * are OK - which is what cleanup_highmap does) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 	copy_page(level2_ident_pgt, l2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 	/* Graft it onto L4[511][510] */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 	copy_page(level2_kernel_pgt, l2);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 	 * Zap execute permission from the ident map. Due to the sharing of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 	 * L1 entries we need to do this in the L2.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 	if (__supported_pte_mask & _PAGE_NX) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 		for (i = 0; i < PTRS_PER_PMD; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 			if (pmd_none(level2_ident_pgt[i]))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 			level2_ident_pgt[i] = pmd_set_flags(level2_ident_pgt[i], _PAGE_NX);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 	/* Copy the initial P->M table mappings if necessary. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 	i = pgd_index(xen_start_info->mfn_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 	if (i && i < pgd_index(__START_KERNEL_map))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 		init_top_pgt[i] = ((pgd_t *)xen_start_info->pt_base)[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 	/* Make pagetable pieces RO */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 	set_page_prot(init_top_pgt, PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 	set_page_prot(level3_ident_pgt, PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 	set_page_prot(level3_kernel_pgt, PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 	set_page_prot(level3_user_vsyscall, PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	set_page_prot(level2_ident_pgt, PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 	set_page_prot(level2_kernel_pgt, PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 	set_page_prot(level2_fixmap_pgt, PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) 	for (i = 0; i < FIXMAP_PMD_NUM; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 		set_page_prot(level1_fixmap_pgt + i * PTRS_PER_PTE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) 			      PAGE_KERNEL_RO);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 	/* Pin down new L4 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 	pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) 			  PFN_DOWN(__pa_symbol(init_top_pgt)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 	/* Unpin Xen-provided one */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 	pin_pagetable_pfn(MMUEXT_UNPIN_TABLE, PFN_DOWN(__pa(pgd)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 	 * At this stage there can be no user pgd, and no page structure to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 	 * attach it to, so make sure we just set kernel pgd.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 	xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 	__xen_write_cr3(true, __pa(init_top_pgt));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 	xen_mc_issue(PARAVIRT_LAZY_CPU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 	/* We can't that easily rip out L3 and L2, as the Xen pagetables are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 	 * set out this way: [L4], [L1], [L2], [L3], [L1], [L1] ...  for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 	 * the initial domain. For guests using the toolstack, they are in:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 	 * [L4], [L3], [L2], [L1], [L1], order .. So for dom0 we can only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 	 * rip out the [L4] (pgd), but for guests we shave off three pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 	for (i = 0; i < ARRAY_SIZE(addr); i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 		check_pt_base(&pt_base, &pt_end, addr[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 	/* Our (by three pages) smaller Xen pagetable that we are using */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) 	xen_pt_base = PFN_PHYS(pt_base);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 	xen_pt_size = (pt_end - pt_base) * PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) 	memblock_reserve(xen_pt_base, xen_pt_size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789) 	/* Revector the xen_start_info */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790) 	xen_start_info = (struct start_info *)__va(__pa(xen_start_info));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794)  * Read a value from a physical address.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796) static unsigned long __init xen_read_phys_ulong(phys_addr_t addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798) 	unsigned long *vaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799) 	unsigned long val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801) 	vaddr = early_memremap_ro(addr, sizeof(val));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802) 	val = *vaddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803) 	early_memunmap(vaddr, sizeof(val));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804) 	return val;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808)  * Translate a virtual address to a physical one without relying on mapped
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809)  * page tables. Don't rely on big pages being aligned in (guest) physical
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810)  * space!
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812) static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814) 	phys_addr_t pa;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815) 	pgd_t pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816) 	pud_t pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817) 	pmd_t pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818) 	pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820) 	pa = read_cr3_pa();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821) 	pgd = native_make_pgd(xen_read_phys_ulong(pa + pgd_index(vaddr) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822) 						       sizeof(pgd)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823) 	if (!pgd_present(pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826) 	pa = pgd_val(pgd) & PTE_PFN_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827) 	pud = native_make_pud(xen_read_phys_ulong(pa + pud_index(vaddr) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828) 						       sizeof(pud)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829) 	if (!pud_present(pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831) 	pa = pud_val(pud) & PTE_PFN_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832) 	if (pud_large(pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833) 		return pa + (vaddr & ~PUD_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835) 	pmd = native_make_pmd(xen_read_phys_ulong(pa + pmd_index(vaddr) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836) 						       sizeof(pmd)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837) 	if (!pmd_present(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839) 	pa = pmd_val(pmd) & PTE_PFN_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840) 	if (pmd_large(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841) 		return pa + (vaddr & ~PMD_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843) 	pte = native_make_pte(xen_read_phys_ulong(pa + pte_index(vaddr) *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844) 						       sizeof(pte)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845) 	if (!pte_present(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) 	pa = pte_pfn(pte) << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 	return pa | (vaddr & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853)  * Find a new area for the hypervisor supplied p2m list and relocate the p2m to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854)  * this area.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) void __init xen_relocate_p2m(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) 	phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 	unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) 	int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) 	pte_t *pt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) 	unsigned long *new_p2m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) 	size = PAGE_ALIGN(xen_start_info->nr_pages * sizeof(unsigned long));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 	n_pte = roundup(size, PAGE_SIZE) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) 	n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 	n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 	n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 	n_frames = n_pte + n_pt + n_pmd + n_pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 	new_area = xen_find_free_area(PFN_PHYS(n_frames));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) 	if (!new_area) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) 		xen_raw_console_write("Can't find new memory area for p2m needed due to E820 map conflict\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881) 	 * Setup the page tables for addressing the new p2m list.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882) 	 * We have asked the hypervisor to map the p2m list at the user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883) 	 * PUD_SIZE. It may have done so, or it may have used a kernel space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884) 	 * address depending on the Xen version.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885) 	 * To avoid any possible virtual address collision, just use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886) 	 * 2 * PUD_SIZE for the new area.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888) 	pud_phys = new_area;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889) 	pmd_phys = pud_phys + PFN_PHYS(n_pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890) 	pt_phys = pmd_phys + PFN_PHYS(n_pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891) 	p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893) 	pgd = __va(read_cr3_pa());
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) 	new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 	for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 		pud = early_memremap(pud_phys, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) 		clear_page(pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 		for (idx_pmd = 0; idx_pmd < min(n_pmd, PTRS_PER_PUD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 				idx_pmd++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 			pmd = early_memremap(pmd_phys, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 			clear_page(pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 			for (idx_pt = 0; idx_pt < min(n_pt, PTRS_PER_PMD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) 					idx_pt++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) 				pt = early_memremap(pt_phys, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 				clear_page(pt);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) 				for (idx_pte = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907) 				     idx_pte < min(n_pte, PTRS_PER_PTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908) 				     idx_pte++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909) 					pt[idx_pte] = pfn_pte(p2m_pfn,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910) 							      PAGE_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911) 					p2m_pfn++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913) 				n_pte -= PTRS_PER_PTE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914) 				early_memunmap(pt, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915) 				make_lowmem_page_readonly(__va(pt_phys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916) 				pin_pagetable_pfn(MMUEXT_PIN_L1_TABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917) 						PFN_DOWN(pt_phys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918) 				pmd[idx_pt] = __pmd(_PAGE_TABLE | pt_phys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919) 				pt_phys += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921) 			n_pt -= PTRS_PER_PMD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922) 			early_memunmap(pmd, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923) 			make_lowmem_page_readonly(__va(pmd_phys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924) 			pin_pagetable_pfn(MMUEXT_PIN_L2_TABLE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925) 					PFN_DOWN(pmd_phys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926) 			pud[idx_pmd] = __pud(_PAGE_TABLE | pmd_phys);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927) 			pmd_phys += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929) 		n_pmd -= PTRS_PER_PUD;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930) 		early_memunmap(pud, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931) 		make_lowmem_page_readonly(__va(pud_phys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932) 		pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933) 		set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934) 		pud_phys += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937) 	/* Now copy the old p2m info to the new area. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) 	memcpy(new_p2m, xen_p2m_addr, size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 	xen_p2m_addr = new_p2m;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) 	/* Release the old p2m list and set new list info. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 	p2m_pfn = PFN_DOWN(xen_early_virt_to_phys(xen_start_info->mfn_list));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 	BUG_ON(!p2m_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 	p2m_pfn_end = p2m_pfn + PFN_DOWN(size);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 	if (xen_start_info->mfn_list < __START_KERNEL_map) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 		pfn = xen_start_info->first_p2m_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 		pfn_end = xen_start_info->first_p2m_pfn +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 			  xen_start_info->nr_p2m_frames;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 		set_pgd(pgd + 1, __pgd(0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 		pfn = p2m_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 		pfn_end = p2m_pfn_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 	memblock_free(PFN_PHYS(pfn), PAGE_SIZE * (pfn_end - pfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	while (pfn < pfn_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 		if (pfn == p2m_pfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 			pfn = p2m_pfn_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 		make_lowmem_page_readwrite(__va(PFN_PHYS(pfn)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) 		pfn++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966) 	xen_start_info->mfn_list = (unsigned long)xen_p2m_addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967) 	xen_start_info->first_p2m_pfn =  PFN_DOWN(new_area);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968) 	xen_start_info->nr_p2m_frames = n_frames;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971) void __init xen_reserve_special_pages(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973) 	phys_addr_t paddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975) 	memblock_reserve(__pa(xen_start_info), PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976) 	if (xen_start_info->store_mfn) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977) 		paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->store_mfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) 		memblock_reserve(paddr, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) 	if (!xen_initial_domain()) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 		paddr = PFN_PHYS(mfn_to_pfn(xen_start_info->console.domU.mfn));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 		memblock_reserve(paddr, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) void __init xen_pt_check_e820(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	if (xen_is_e820_reserved(xen_pt_base, xen_pt_size)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 		xen_raw_console_write("Xen hypervisor allocated page table memory conflicts with E820 map\n");
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) static unsigned char dummy_mapping[PAGE_SIZE] __page_aligned_bss;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) static void xen_set_fixmap(unsigned idx, phys_addr_t phys, pgprot_t prot)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 	pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) 	phys >>= PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 	switch (idx) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) 	case FIX_BTMAP_END ... FIX_BTMAP_BEGIN:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004) #ifdef CONFIG_X86_VSYSCALL_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005) 	case VSYSCALL_PAGE:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007) 		/* All local page mappings */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008) 		pte = pfn_pte(phys, prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011) #ifdef CONFIG_X86_LOCAL_APIC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012) 	case FIX_APIC_BASE:	/* maps dummy local APIC */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013) 		pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017) #ifdef CONFIG_X86_IO_APIC
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018) 	case FIX_IO_APIC_BASE_0 ... FIX_IO_APIC_BASE_END:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020) 		 * We just don't map the IO APIC - all access is via
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021) 		 * hypercalls.  Keep the address in the pte for reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023) 		pte = pfn_pte(PFN_DOWN(__pa(dummy_mapping)), PAGE_KERNEL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027) 	case FIX_PARAVIRT_BOOTMAP:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028) 		/* This is an MFN, but it isn't an IO mapping from the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029) 		   IO domain */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030) 		pte = mfn_pte(phys, prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033) 	default:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034) 		/* By default, set_fixmap is used for hardware mappings */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035) 		pte = mfn_pte(phys, prot);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) 		break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) 	__native_set_fixmap(idx, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041) #ifdef CONFIG_X86_VSYSCALL_EMULATION
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042) 	/* Replicate changes to map the vsyscall page into the user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043) 	   pagetable vsyscall mapping. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044) 	if (idx == VSYSCALL_PAGE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045) 		unsigned long vaddr = __fix_to_virt(idx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046) 		set_pte_vaddr_pud(level3_user_vsyscall, vaddr, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051) static void __init xen_post_allocator_init(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053) 	pv_ops.mmu.set_pte = xen_set_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054) 	pv_ops.mmu.set_pmd = xen_set_pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055) 	pv_ops.mmu.set_pud = xen_set_pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056) 	pv_ops.mmu.set_p4d = xen_set_p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058) 	/* This will work as long as patching hasn't happened yet
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059) 	   (which it hasn't) */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060) 	pv_ops.mmu.alloc_pte = xen_alloc_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061) 	pv_ops.mmu.alloc_pmd = xen_alloc_pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062) 	pv_ops.mmu.release_pte = xen_release_pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063) 	pv_ops.mmu.release_pmd = xen_release_pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064) 	pv_ops.mmu.alloc_pud = xen_alloc_pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065) 	pv_ops.mmu.release_pud = xen_release_pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066) 	pv_ops.mmu.make_pte = PV_CALLEE_SAVE(xen_make_pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068) 	pv_ops.mmu.write_cr3 = &xen_write_cr3;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) static void xen_leave_lazy_mmu(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 	preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	xen_mc_flush();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 	paravirt_leave_lazy_mmu();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 	preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) static const struct pv_mmu_ops xen_mmu_ops __initconst = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 	.read_cr2 = __PV_IS_CALLEE_SAVE(xen_read_cr2),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 	.write_cr2 = xen_write_cr2,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) 	.read_cr3 = xen_read_cr3,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) 	.write_cr3 = xen_write_cr3_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086) 	.flush_tlb_user = xen_flush_tlb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) 	.flush_tlb_kernel = xen_flush_tlb,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) 	.flush_tlb_one_user = xen_flush_tlb_one_user,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 	.flush_tlb_others = xen_flush_tlb_others,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) 	.tlb_remove_table = tlb_remove_table,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 	.pgd_alloc = xen_pgd_alloc,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) 	.pgd_free = xen_pgd_free,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 	.alloc_pte = xen_alloc_pte_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) 	.release_pte = xen_release_pte_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 	.alloc_pmd = xen_alloc_pmd_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 	.release_pmd = xen_release_pmd_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 	.set_pte = xen_set_pte_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 	.set_pmd = xen_set_pmd_hyper,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 	.ptep_modify_prot_start = __ptep_modify_prot_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 	.ptep_modify_prot_commit = __ptep_modify_prot_commit,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) 	.pte_val = PV_CALLEE_SAVE(xen_pte_val),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 	.pgd_val = PV_CALLEE_SAVE(xen_pgd_val),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) 	.make_pte = PV_CALLEE_SAVE(xen_make_pte_init),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) 	.make_pgd = PV_CALLEE_SAVE(xen_make_pgd),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 	.set_pud = xen_set_pud_hyper,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 	.make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 	.pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 	.pud_val = PV_CALLEE_SAVE(xen_pud_val),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 	.make_pud = PV_CALLEE_SAVE(xen_make_pud),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 	.set_p4d = xen_set_p4d_hyper,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 	.alloc_pud = xen_alloc_pmd_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 	.release_pud = xen_release_pmd_init,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) #if CONFIG_PGTABLE_LEVELS >= 5
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 	.p4d_val = PV_CALLEE_SAVE(xen_p4d_val),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 	.make_p4d = PV_CALLEE_SAVE(xen_make_p4d),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 	.activate_mm = xen_activate_mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 	.dup_mmap = xen_dup_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 	.exit_mmap = xen_exit_mmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 	.lazy_mode = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 		.enter = paravirt_enter_lazy_mmu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 		.leave = xen_leave_lazy_mmu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 		.flush = paravirt_flush_lazy_mmu,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) 	},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 	.set_fixmap = xen_set_fixmap,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) void __init xen_init_mmu_ops(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 	x86_init.paging.pagetable_init = xen_pagetable_init;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 	x86_init.hyper.init_after_bootmem = xen_after_bootmem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 	pv_ops.mmu = xen_mmu_ops;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 	memset(dummy_mapping, 0xff, PAGE_SIZE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) /* Protected by xen_reservation_lock. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) #define MAX_CONTIG_ORDER 9 /* 2MB */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) static unsigned long discontig_frames[1<<MAX_CONTIG_ORDER];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) #define VOID_PTE (mfn_pte(0, __pgprot(0)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) 				unsigned long *in_frames,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 				unsigned long *out_frames)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 	struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 	xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) 	for (i = 0; i < (1UL<<order); i++, vaddr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 		mcs = __xen_mc_entry(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 		if (in_frames)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 			in_frames[i] = virt_to_mfn(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) 		MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 		__set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 		if (out_frames)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 			out_frames[i] = virt_to_pfn(vaddr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 	xen_mc_issue(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181)  * Update the pfn-to-mfn mappings for a virtual address range, either to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182)  * point to an array of mfns, or contiguously from a single starting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183)  * mfn.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) static void xen_remap_exchanged_ptes(unsigned long vaddr, int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) 				     unsigned long *mfns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187) 				     unsigned long first_mfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189) 	unsigned i, limit;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190) 	unsigned long mfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192) 	xen_mc_batch();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194) 	limit = 1u << order;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) 	for (i = 0; i < limit; i++, vaddr += PAGE_SIZE) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 		struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) 		unsigned flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) 		mcs = __xen_mc_entry(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) 		if (mfns)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 			mfn = mfns[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) 			mfn = first_mfn + i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) 		if (i < (limit - 1))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) 			flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 		else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) 			if (order == 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 				flags = UVMF_INVLPG | UVMF_ALL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 			else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 				flags = UVMF_TLB_FLUSH | UVMF_ALL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 		MULTI_update_va_mapping(mcs.mc, vaddr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) 				mfn_pte(mfn, PAGE_KERNEL), flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 		set_phys_to_machine(virt_to_pfn(vaddr), mfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 	xen_mc_issue(0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224)  * Perform the hypercall to exchange a region of our pfns to point to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225)  * memory with the required contiguous alignment.  Takes the pfns as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226)  * input, and populates mfns as output.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228)  * Returns a success code indicating whether the hypervisor was able to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229)  * satisfy the request or not.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) static int xen_exchange_memory(unsigned long extents_in, unsigned int order_in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 			       unsigned long *pfns_in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) 			       unsigned long extents_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 			       unsigned int order_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 			       unsigned long *mfns_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) 			       unsigned int address_bits)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 	long rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 	int success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 	struct xen_memory_exchange exchange = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 		.in = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 			.nr_extents   = extents_in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 			.extent_order = order_in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 			.extent_start = pfns_in,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 			.domid        = DOMID_SELF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 		},
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 		.out = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) 			.nr_extents   = extents_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 			.extent_order = order_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) 			.extent_start = mfns_out,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 			.address_bits = address_bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 			.domid        = DOMID_SELF
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 	BUG_ON(extents_in << order_in != extents_out << order_out);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 	rc = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 	success = (exchange.nr_exchanged == extents_in);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 	BUG_ON(!success && ((exchange.nr_exchanged != 0) || (rc == 0)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 	BUG_ON(success && (rc != 0));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 	return success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) 				 unsigned int address_bits,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 				 dma_addr_t *dma_handle)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) 	unsigned long *in_frames = discontig_frames, out_frame;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 	unsigned long  flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 	int            success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) 	unsigned long vstart = (unsigned long)phys_to_virt(pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 	 * Currently an auto-translated guest will not perform I/O, nor will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 	 * it require PAE page directories below 4GB. Therefore any calls to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) 	 * this function are redundant and can be ignored.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) 	if (unlikely(order > MAX_CONTIG_ORDER))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) 		return -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) 	memset((void *) vstart, 0, PAGE_SIZE << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) 	spin_lock_irqsave(&xen_reservation_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 	/* 1. Zap current PTEs, remembering MFNs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 	xen_zap_pfn_range(vstart, order, in_frames, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 	/* 2. Get a new contiguous memory extent. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 	out_frame = virt_to_pfn(vstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) 	success = xen_exchange_memory(1UL << order, 0, in_frames,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 				      1, order, &out_frame,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) 				      address_bits);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 	/* 3. Map the new extent in place of old pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) 	if (success)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 		xen_remap_exchanged_ptes(vstart, order, NULL, out_frame);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) 		xen_remap_exchanged_ptes(vstart, order, in_frames, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) 	spin_unlock_irqrestore(&xen_reservation_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 	*dma_handle = virt_to_machine(vstart).maddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) 	return success ? 0 : -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 	unsigned long *out_frames = discontig_frames, in_frame;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 	unsigned long  flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 	int success;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 	unsigned long vstart;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 	if (unlikely(order > MAX_CONTIG_ORDER))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 	vstart = (unsigned long)phys_to_virt(pstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 	memset((void *) vstart, 0, PAGE_SIZE << order);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 	spin_lock_irqsave(&xen_reservation_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 	/* 1. Find start MFN of contiguous extent. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 	in_frame = virt_to_mfn(vstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 	/* 2. Zap current PTEs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 	xen_zap_pfn_range(vstart, order, NULL, out_frames);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 	/* 3. Do the exchange for non-contiguous MFNs. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 	success = xen_exchange_memory(1, order, &in_frame, 1UL << order,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 					0, out_frames, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 	/* 4. Map new pages in place of old pages. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 	if (success)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 		xen_remap_exchanged_ptes(vstart, order, out_frames, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 		xen_remap_exchanged_ptes(vstart, order, NULL, in_frame);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) 	spin_unlock_irqrestore(&xen_reservation_lock, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) static noinline void xen_flush_tlb_all(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) 	struct mmuext_op *op;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 	struct multicall_space mcs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 	preempt_disable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) 	mcs = xen_mc_entry(sizeof(*op));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) 	op = mcs.args;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 	op->cmd = MMUEXT_TLB_FLUSH_ALL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) 	MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 	xen_mc_issue(PARAVIRT_LAZY_MMU);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) 	preempt_enable();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) #define REMAP_BATCH_SIZE 16
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) struct remap_data {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 	xen_pfn_t *pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) 	bool contiguous;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) 	bool no_translate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 	pgprot_t prot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) 	struct mmu_update *mmu_update;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) static int remap_area_pfn_pte_fn(pte_t *ptep, unsigned long addr, void *data)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 	struct remap_data *rmd = data;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 	pte_t pte = pte_mkspecial(mfn_pte(*rmd->pfn, rmd->prot));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 	 * If we have a contiguous range, just update the pfn itself,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 	 * else update pointer to be "next pfn".
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 	if (rmd->contiguous)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 		(*rmd->pfn)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 		rmd->pfn++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 	rmd->mmu_update->ptr = virt_to_machine(ptep).maddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 	rmd->mmu_update->ptr |= rmd->no_translate ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 		MMU_PT_UPDATE_NO_TRANSLATE :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 		MMU_NORMAL_PT_UPDATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 	rmd->mmu_update->val = pte_val_ma(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) 	rmd->mmu_update++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) int xen_remap_pfn(struct vm_area_struct *vma, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 		  xen_pfn_t *pfn, int nr, int *err_ptr, pgprot_t prot,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 		  unsigned int domid, bool no_translate, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 	int err = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) 	struct remap_data rmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 	struct mmu_update mmu_update[REMAP_BATCH_SIZE];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) 	unsigned long range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 	int mapped = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) 	BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 	rmd.pfn = pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 	rmd.prot = prot;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 	 * We use the err_ptr to indicate if there we are doing a contiguous
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 	 * mapping or a discontigious mapping.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 	rmd.contiguous = !err_ptr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 	rmd.no_translate = no_translate;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 	while (nr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 		int index = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 		int done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 		int batch = min(REMAP_BATCH_SIZE, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 		int batch_left = batch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 		range = (unsigned long)batch << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 		rmd.mmu_update = mmu_update;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 		err = apply_to_page_range(vma->vm_mm, addr, range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 					  remap_area_pfn_pte_fn, &rmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 		 * We record the error for each page that gives an error, but
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 		 * continue mapping until the whole set is done
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) 		do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 			int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 			err = HYPERVISOR_mmu_update(&mmu_update[index],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 						    batch_left, &done, domid);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 			 * @err_ptr may be the same buffer as @gfn, so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 			 * only clear it after each chunk of @gfn is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 			 * used.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 			if (err_ptr) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 				for (i = index; i < index + done; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 					err_ptr[i] = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 			if (err < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) 				if (!err_ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) 					goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 				err_ptr[i] = err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 				done++; /* Skip failed frame. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 			} else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 				mapped += done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 			batch_left -= done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) 			index += done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 		} while (batch_left);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 		nr -= batch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) 		addr += range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) 		if (err_ptr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) 			err_ptr += batch;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) 	xen_flush_tlb_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) 	return err < 0 ? err : mapped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) EXPORT_SYMBOL_GPL(xen_remap_pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) #ifdef CONFIG_KEXEC_CORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) phys_addr_t paddr_vmcoreinfo_note(void)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 	if (xen_pv_domain())
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 		return virt_to_machine(vmcoreinfo_note).maddr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) 		return __pa(vmcoreinfo_note);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) #endif /* CONFIG_KEXEC_CORE */