Orange Pi5 kernel

Deprecated Linux kernel 5.10.110 for OrangePi 5/5B/5+ boards

3 Commits   0 Branches   0 Tags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   1) // SPDX-License-Identifier: GPL-2.0-or-later
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   2) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   3)  * Copyright 2013 Red Hat Inc.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   4)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   5)  * Authors: Jérôme Glisse <jglisse@redhat.com>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   6)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   7) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   8)  * Refer to include/linux/hmm.h for information about heterogeneous memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   9)  * management or HMM for short.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  10)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  11) #include <linux/pagewalk.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  12) #include <linux/hmm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  13) #include <linux/init.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  14) #include <linux/rmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  15) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  16) #include <linux/slab.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  17) #include <linux/sched.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  18) #include <linux/mmzone.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  19) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  20) #include <linux/swapops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  21) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  22) #include <linux/memremap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  23) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  24) #include <linux/jump_label.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  25) #include <linux/dma-mapping.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  26) #include <linux/mmu_notifier.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  27) #include <linux/memory_hotplug.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  28) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  29) struct hmm_vma_walk {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  30) 	struct hmm_range	*range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  31) 	unsigned long		last;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  32) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  33) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  34) enum {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  35) 	HMM_NEED_FAULT = 1 << 0,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  36) 	HMM_NEED_WRITE_FAULT = 1 << 1,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  37) 	HMM_NEED_ALL_BITS = HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  38) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  39) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  40) static int hmm_pfns_fill(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  41) 			 struct hmm_range *range, unsigned long cpu_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  42) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  43) 	unsigned long i = (addr - range->start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  44) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  45) 	for (; addr < end; addr += PAGE_SIZE, i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  46) 		range->hmm_pfns[i] = cpu_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  47) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  48) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  49) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  50) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  51)  * hmm_vma_fault() - fault in a range lacking valid pmd or pte(s)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  52)  * @addr: range virtual start address (inclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  53)  * @end: range virtual end address (exclusive)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  54)  * @required_fault: HMM_NEED_* flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  55)  * @walk: mm_walk structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  56)  * Return: -EBUSY after page fault, or page fault error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  57)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  58)  * This function will be called whenever pmd_none() or pte_none() returns true,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  59)  * or whenever there is no page directory covering the virtual address range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  60)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  61) static int hmm_vma_fault(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  62) 			 unsigned int required_fault, struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  63) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  64) 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  65) 	struct vm_area_struct *vma = walk->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  66) 	unsigned int fault_flags = FAULT_FLAG_REMOTE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  67) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  68) 	WARN_ON_ONCE(!required_fault);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  69) 	hmm_vma_walk->last = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  70) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  71) 	if (required_fault & HMM_NEED_WRITE_FAULT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  72) 		if (!(vma->vm_flags & VM_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  73) 			return -EPERM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  74) 		fault_flags |= FAULT_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  75) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  76) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  77) 	for (; addr < end; addr += PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  78) 		if (handle_mm_fault(vma, addr, fault_flags, NULL) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  79) 		    VM_FAULT_ERROR)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  80) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  81) 	return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  82) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  83) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  84) static unsigned int hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  85) 				       unsigned long pfn_req_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  86) 				       unsigned long cpu_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  87) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  88) 	struct hmm_range *range = hmm_vma_walk->range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  89) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  90) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  91) 	 * So we not only consider the individual per page request we also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  92) 	 * consider the default flags requested for the range. The API can
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  93) 	 * be used 2 ways. The first one where the HMM user coalesces
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  94) 	 * multiple page faults into one request and sets flags per pfn for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  95) 	 * those faults. The second one where the HMM user wants to pre-
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  96) 	 * fault a range with specific flags. For the latter one it is a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  97) 	 * waste to have the user pre-fill the pfn arrays with a default
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  98) 	 * flags value.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  99) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 100) 	pfn_req_flags &= range->pfn_flags_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 101) 	pfn_req_flags |= range->default_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 102) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 103) 	/* We aren't ask to do anything ... */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 104) 	if (!(pfn_req_flags & HMM_PFN_REQ_FAULT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 105) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 106) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 107) 	/* Need to write fault ? */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 108) 	if ((pfn_req_flags & HMM_PFN_REQ_WRITE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 109) 	    !(cpu_flags & HMM_PFN_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 110) 		return HMM_NEED_FAULT | HMM_NEED_WRITE_FAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 111) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 112) 	/* If CPU page table is not valid then we need to fault */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 113) 	if (!(cpu_flags & HMM_PFN_VALID))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 114) 		return HMM_NEED_FAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 115) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 116) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 117) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 118) static unsigned int
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 119) hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 120) 		     const unsigned long hmm_pfns[], unsigned long npages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 121) 		     unsigned long cpu_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 122) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 123) 	struct hmm_range *range = hmm_vma_walk->range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 124) 	unsigned int required_fault = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 125) 	unsigned long i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 127) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 128) 	 * If the default flags do not request to fault pages, and the mask does
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 129) 	 * not allow for individual pages to be faulted, then
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 130) 	 * hmm_pte_need_fault() will always return 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 131) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 132) 	if (!((range->default_flags | range->pfn_flags_mask) &
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 133) 	      HMM_PFN_REQ_FAULT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 134) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 135) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 136) 	for (i = 0; i < npages; ++i) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 137) 		required_fault |= hmm_pte_need_fault(hmm_vma_walk, hmm_pfns[i],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 138) 						     cpu_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 139) 		if (required_fault == HMM_NEED_ALL_BITS)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 140) 			return required_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 141) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 142) 	return required_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 143) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 144) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 145) static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 146) 			     __always_unused int depth, struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 147) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 148) 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 149) 	struct hmm_range *range = hmm_vma_walk->range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 150) 	unsigned int required_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 151) 	unsigned long i, npages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 152) 	unsigned long *hmm_pfns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 153) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 154) 	i = (addr - range->start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 155) 	npages = (end - addr) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 156) 	hmm_pfns = &range->hmm_pfns[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 157) 	required_fault =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 158) 		hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 159) 	if (!walk->vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 160) 		if (required_fault)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 161) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 162) 		return hmm_pfns_fill(addr, end, range, HMM_PFN_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 163) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 164) 	if (required_fault)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 165) 		return hmm_vma_fault(addr, end, required_fault, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 166) 	return hmm_pfns_fill(addr, end, range, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 167) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 168) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 169) static inline unsigned long hmm_pfn_flags_order(unsigned long order)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 170) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 171) 	return order << HMM_PFN_ORDER_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 172) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 174) static inline unsigned long pmd_to_hmm_pfn_flags(struct hmm_range *range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 175) 						 pmd_t pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 176) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 177) 	if (pmd_protnone(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 178) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 179) 	return (pmd_write(pmd) ? (HMM_PFN_VALID | HMM_PFN_WRITE) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 180) 				 HMM_PFN_VALID) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 181) 	       hmm_pfn_flags_order(PMD_SHIFT - PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 182) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 183) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 184) #ifdef CONFIG_TRANSPARENT_HUGEPAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 185) static int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 186) 			      unsigned long end, unsigned long hmm_pfns[],
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 187) 			      pmd_t pmd)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 188) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 189) 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 190) 	struct hmm_range *range = hmm_vma_walk->range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 191) 	unsigned long pfn, npages, i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 192) 	unsigned int required_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 193) 	unsigned long cpu_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 194) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 195) 	npages = (end - addr) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 196) 	cpu_flags = pmd_to_hmm_pfn_flags(range, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 197) 	required_fault =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 198) 		hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, cpu_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 199) 	if (required_fault)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 200) 		return hmm_vma_fault(addr, end, required_fault, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 202) 	pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 203) 	for (i = 0; addr < end; addr += PAGE_SIZE, i++, pfn++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 204) 		hmm_pfns[i] = pfn | cpu_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 205) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 206) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 207) #else /* CONFIG_TRANSPARENT_HUGEPAGE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 208) /* stub to allow the code below to compile */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 209) int hmm_vma_handle_pmd(struct mm_walk *walk, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 210) 		unsigned long end, unsigned long hmm_pfns[], pmd_t pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 211) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 213) static inline bool hmm_is_device_private_entry(struct hmm_range *range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 214) 		swp_entry_t entry)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 215) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 216) 	return is_device_private_entry(entry) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 217) 		device_private_entry_to_page(entry)->pgmap->owner ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 218) 		range->dev_private_owner;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 219) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 221) static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 222) 						 pte_t pte)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 223) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 224) 	if (pte_none(pte) || !pte_present(pte) || pte_protnone(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 225) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 226) 	return pte_write(pte) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 227) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 229) static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 230) 			      unsigned long end, pmd_t *pmdp, pte_t *ptep,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 231) 			      unsigned long *hmm_pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 232) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 233) 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 234) 	struct hmm_range *range = hmm_vma_walk->range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 235) 	unsigned int required_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 236) 	unsigned long cpu_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 237) 	pte_t pte = *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 238) 	uint64_t pfn_req_flags = *hmm_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 240) 	if (pte_none(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 241) 		required_fault =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 242) 			hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 243) 		if (required_fault)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 244) 			goto fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 245) 		*hmm_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 246) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 247) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 248) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 249) 	if (!pte_present(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 250) 		swp_entry_t entry = pte_to_swp_entry(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 251) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 252) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 253) 		 * Never fault in device private pages, but just report
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 254) 		 * the PFN even if not present.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 255) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 256) 		if (hmm_is_device_private_entry(range, entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 257) 			cpu_flags = HMM_PFN_VALID;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 258) 			if (is_write_device_private_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 259) 				cpu_flags |= HMM_PFN_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 260) 			*hmm_pfn = device_private_entry_to_pfn(entry) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 261) 					cpu_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 262) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 263) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 264) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 265) 		required_fault =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 266) 			hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 267) 		if (!required_fault) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 268) 			*hmm_pfn = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 269) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 270) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 271) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 272) 		if (!non_swap_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 273) 			goto fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 275) 		if (is_migration_entry(entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 276) 			pte_unmap(ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 277) 			hmm_vma_walk->last = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 278) 			migration_entry_wait(walk->mm, pmdp, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 279) 			return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 280) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 281) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 282) 		/* Report error for everything else */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 283) 		pte_unmap(ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 284) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 285) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 286) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 287) 	cpu_flags = pte_to_hmm_pfn_flags(range, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 288) 	required_fault =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 289) 		hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 290) 	if (required_fault)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 291) 		goto fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 292) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 293) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 294) 	 * Bypass devmap pte such as DAX page when all pfn requested
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 295) 	 * flags(pfn_req_flags) are fulfilled.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 296) 	 * Since each architecture defines a struct page for the zero page, just
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 297) 	 * fall through and treat it like a normal page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 298) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 299) 	if (!vm_normal_page(walk->vma, addr, pte) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 300) 	    !pte_devmap(pte) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 301) 	    !is_zero_pfn(pte_pfn(pte))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 302) 		if (hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 303) 			pte_unmap(ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 304) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 305) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 306) 		*hmm_pfn = HMM_PFN_ERROR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 307) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 308) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 309) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 310) 	*hmm_pfn = pte_pfn(pte) | cpu_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 311) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 312) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 313) fault:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 314) 	pte_unmap(ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 315) 	/* Fault any virtual address we were asked to fault */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 316) 	return hmm_vma_fault(addr, end, required_fault, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 317) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 318) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 319) static int hmm_vma_walk_pmd(pmd_t *pmdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 320) 			    unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 321) 			    unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 322) 			    struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 323) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 324) 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 325) 	struct hmm_range *range = hmm_vma_walk->range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 326) 	unsigned long *hmm_pfns =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 327) 		&range->hmm_pfns[(start - range->start) >> PAGE_SHIFT];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 328) 	unsigned long npages = (end - start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 329) 	unsigned long addr = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 330) 	pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 331) 	pmd_t pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 332) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 333) again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 334) 	pmd = READ_ONCE(*pmdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 335) 	if (pmd_none(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 336) 		return hmm_vma_walk_hole(start, end, -1, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 337) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 338) 	if (thp_migration_supported() && is_pmd_migration_entry(pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 339) 		if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 340) 			hmm_vma_walk->last = addr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 341) 			pmd_migration_entry_wait(walk->mm, pmdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 342) 			return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 343) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 344) 		return hmm_pfns_fill(start, end, range, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 345) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 346) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 347) 	if (!pmd_present(pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 348) 		if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 349) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 350) 		return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 351) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 352) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 353) 	if (pmd_devmap(pmd) || pmd_trans_huge(pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 354) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 355) 		 * No need to take pmd_lock here, even if some other thread
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 356) 		 * is splitting the huge pmd we will get that event through
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 357) 		 * mmu_notifier callback.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 358) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 359) 		 * So just read pmd value and check again it's a transparent
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 360) 		 * huge or device mapping one and compute corresponding pfn
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 361) 		 * values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 362) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 363) 		pmd = pmd_read_atomic(pmdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 364) 		barrier();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 365) 		if (!pmd_devmap(pmd) && !pmd_trans_huge(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 366) 			goto again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 367) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 368) 		return hmm_vma_handle_pmd(walk, addr, end, hmm_pfns, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 369) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 370) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 371) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 372) 	 * We have handled all the valid cases above ie either none, migration,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 373) 	 * huge or transparent huge. At this point either it is a valid pmd
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 374) 	 * entry pointing to pte directory or it is a bad pmd that will not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 375) 	 * recover.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 376) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 377) 	if (pmd_bad(pmd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 378) 		if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 379) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 380) 		return hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 381) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 382) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 383) 	ptep = pte_offset_map(pmdp, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 384) 	for (; addr < end; addr += PAGE_SIZE, ptep++, hmm_pfns++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 385) 		int r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 387) 		r = hmm_vma_handle_pte(walk, addr, end, pmdp, ptep, hmm_pfns);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 388) 		if (r) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 389) 			/* hmm_vma_handle_pte() did pte_unmap() */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 390) 			return r;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 391) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 392) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 393) 	pte_unmap(ptep - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 394) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 395) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 396) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 397) #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && \
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 398)     defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 399) static inline unsigned long pud_to_hmm_pfn_flags(struct hmm_range *range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 400) 						 pud_t pud)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 401) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 402) 	if (!pud_present(pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 403) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 404) 	return (pud_write(pud) ? (HMM_PFN_VALID | HMM_PFN_WRITE) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 405) 				 HMM_PFN_VALID) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 406) 	       hmm_pfn_flags_order(PUD_SHIFT - PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 407) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 408) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 409) static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 410) 		struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 411) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 412) 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 413) 	struct hmm_range *range = hmm_vma_walk->range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 414) 	unsigned long addr = start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 415) 	pud_t pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 416) 	int ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 417) 	spinlock_t *ptl = pud_trans_huge_lock(pudp, walk->vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 418) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 419) 	if (!ptl)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 420) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 421) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 422) 	/* Normally we don't want to split the huge page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 423) 	walk->action = ACTION_CONTINUE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 425) 	pud = READ_ONCE(*pudp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 426) 	if (pud_none(pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 427) 		spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 428) 		return hmm_vma_walk_hole(start, end, -1, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 429) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 430) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 431) 	if (pud_huge(pud) && pud_devmap(pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 432) 		unsigned long i, npages, pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 433) 		unsigned int required_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 434) 		unsigned long *hmm_pfns;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 435) 		unsigned long cpu_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 436) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 437) 		if (!pud_present(pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 438) 			spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 439) 			return hmm_vma_walk_hole(start, end, -1, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 440) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 441) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 442) 		i = (addr - range->start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 443) 		npages = (end - addr) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 444) 		hmm_pfns = &range->hmm_pfns[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 445) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 446) 		cpu_flags = pud_to_hmm_pfn_flags(range, pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 447) 		required_fault = hmm_range_need_fault(hmm_vma_walk, hmm_pfns,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 448) 						      npages, cpu_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 449) 		if (required_fault) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 450) 			spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 451) 			return hmm_vma_fault(addr, end, required_fault, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 452) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 453) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 454) 		pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 455) 		for (i = 0; i < npages; ++i, ++pfn)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 456) 			hmm_pfns[i] = pfn | cpu_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 457) 		goto out_unlock;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 458) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 459) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 460) 	/* Ask for the PUD to be split */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 461) 	walk->action = ACTION_SUBTREE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 462) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 463) out_unlock:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 464) 	spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 465) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 466) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 467) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 468) #define hmm_vma_walk_pud	NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 469) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 470) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 471) #ifdef CONFIG_HUGETLB_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 472) static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 473) 				      unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 474) 				      struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 475) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 476) 	unsigned long addr = start, i, pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 477) 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 478) 	struct hmm_range *range = hmm_vma_walk->range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 479) 	struct vm_area_struct *vma = walk->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 480) 	unsigned int required_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 481) 	unsigned long pfn_req_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 482) 	unsigned long cpu_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 483) 	spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 484) 	pte_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 485) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 486) 	ptl = huge_pte_lock(hstate_vma(vma), walk->mm, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 487) 	entry = huge_ptep_get(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 489) 	i = (start - range->start) >> PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 490) 	pfn_req_flags = range->hmm_pfns[i];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 491) 	cpu_flags = pte_to_hmm_pfn_flags(range, entry) |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 492) 		    hmm_pfn_flags_order(huge_page_order(hstate_vma(vma)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 493) 	required_fault =
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 494) 		hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, cpu_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 495) 	if (required_fault) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 496) 		spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 497) 		return hmm_vma_fault(addr, end, required_fault, walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 498) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 499) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 500) 	pfn = pte_pfn(entry) + ((start & ~hmask) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 501) 	for (; addr < end; addr += PAGE_SIZE, i++, pfn++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 502) 		range->hmm_pfns[i] = pfn | cpu_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 503) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 504) 	spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 505) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 506) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 507) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 508) #define hmm_vma_walk_hugetlb_entry NULL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 509) #endif /* CONFIG_HUGETLB_PAGE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 510) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 511) static int hmm_vma_walk_test(unsigned long start, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 512) 			     struct mm_walk *walk)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 513) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 514) 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 515) 	struct hmm_range *range = hmm_vma_walk->range;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 516) 	struct vm_area_struct *vma = walk->vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 517) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 518) 	if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 519) 	    vma->vm_flags & VM_READ)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 520) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 521) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 522) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 523) 	 * vma ranges that don't have struct page backing them or map I/O
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 524) 	 * devices directly cannot be handled by hmm_range_fault().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 525) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 526) 	 * If the vma does not allow read access, then assume that it does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 527) 	 * allow write access either. HMM does not support architectures that
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 528) 	 * allow write without read.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 529) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 530) 	 * If a fault is requested for an unsupported range then it is a hard
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 531) 	 * failure.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 532) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 533) 	if (hmm_range_need_fault(hmm_vma_walk,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 534) 				 range->hmm_pfns +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 535) 					 ((start - range->start) >> PAGE_SHIFT),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 536) 				 (end - start) >> PAGE_SHIFT, 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 537) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 538) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 539) 	hmm_pfns_fill(start, end, range, HMM_PFN_ERROR);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 540) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 541) 	/* Skip this vma and continue processing the next vma. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 542) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 543) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 544) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 545) static const struct mm_walk_ops hmm_walk_ops = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 546) 	.pud_entry	= hmm_vma_walk_pud,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 547) 	.pmd_entry	= hmm_vma_walk_pmd,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 548) 	.pte_hole	= hmm_vma_walk_hole,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 549) 	.hugetlb_entry	= hmm_vma_walk_hugetlb_entry,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 550) 	.test_walk	= hmm_vma_walk_test,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 551) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 553) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 554)  * hmm_range_fault - try to fault some address in a virtual address range
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 555)  * @range:	argument structure
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 556)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 557)  * Returns 0 on success or one of the following error codes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 558)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 559)  * -EINVAL:	Invalid arguments or mm or virtual address is in an invalid vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 560)  *		(e.g., device file vma).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 561)  * -ENOMEM:	Out of memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 562)  * -EPERM:	Invalid permission (e.g., asking for write and range is read
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 563)  *		only).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 564)  * -EBUSY:	The range has been invalidated and the caller needs to wait for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 565)  *		the invalidation to finish.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 566)  * -EFAULT:     A page was requested to be valid and could not be made valid
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 567)  *              ie it has no backing VMA or it is illegal to access
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 568)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 569)  * This is similar to get_user_pages(), except that it can read the page tables
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 570)  * without mutating them (ie causing faults).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 571)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 572) int hmm_range_fault(struct hmm_range *range)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 573) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 574) 	struct hmm_vma_walk hmm_vma_walk = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 575) 		.range = range,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 576) 		.last = range->start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 577) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 578) 	struct mm_struct *mm = range->notifier->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 579) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 580) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 581) 	mmap_assert_locked(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 582) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 583) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 584) 		/* If range is no longer valid force retry. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 585) 		if (mmu_interval_check_retry(range->notifier,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 586) 					     range->notifier_seq))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 587) 			return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 588) 		ret = walk_page_range(mm, hmm_vma_walk.last, range->end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 589) 				      &hmm_walk_ops, &hmm_vma_walk);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 590) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 591) 		 * When -EBUSY is returned the loop restarts with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 592) 		 * hmm_vma_walk.last set to an address that has not been stored
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 593) 		 * in pfns. All entries < last in the pfn array are set to their
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 594) 		 * output, and all >= are still at their input values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 595) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 596) 	} while (ret == -EBUSY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 597) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 598) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 599) EXPORT_SYMBOL(hmm_range_fault);