Orange Pi5 kernel

^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    1) // SPDX-License-Identifier: GPL-2.0-only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    2) #include <linux/kernel.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    3) #include <linux/errno.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    4) #include <linux/err.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    5) #include <linux/spinlock.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    6) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    7) #include <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    8) #include <linux/memremap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300    9) #include <linux/pagemap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   10) #include <linux/rmap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   11) #include <linux/swap.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   12) #include <linux/swapops.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   13) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   14) #include <linux/sched/signal.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   15) #include <linux/rwsem.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   16) #include <linux/hugetlb.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   17) #include <linux/migrate.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   18) #include <linux/mm_inline.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   19) #include <linux/sched/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   20) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   21) #include <linux/page_pinner.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   22) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   23) #include <asm/mmu_context.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   24) #include <asm/tlbflush.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   25) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   26) #include "internal.h"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   27) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   28) struct follow_page_context {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   29) 	struct dev_pagemap *pgmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   30) 	unsigned int page_mask;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   31) };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   32) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   33) static void hpage_pincount_add(struct page *page, int refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   34) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   35) 	VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   36) 	VM_BUG_ON_PAGE(page != compound_head(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   37) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   38) 	atomic_add(refs, compound_pincount_ptr(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   39) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   40) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   41) static void hpage_pincount_sub(struct page *page, int refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   42) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   43) 	VM_BUG_ON_PAGE(!hpage_pincount_available(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   44) 	VM_BUG_ON_PAGE(page != compound_head(page), page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   45) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   46) 	atomic_sub(refs, compound_pincount_ptr(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   47) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   48) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   49) /* Equivalent to calling put_page() @refs times. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   50) static void put_page_refs(struct page *page, int refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   51) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   52) #ifdef CONFIG_DEBUG_VM
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   53) 	if (VM_WARN_ON_ONCE_PAGE(page_ref_count(page) < refs, page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   54) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   55) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   56) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   57) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   58) 	 * Calling put_page() for each ref is unnecessarily slow. Only the last
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   59) 	 * ref needs a put_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   60) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   61) 	if (refs > 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   62) 		page_ref_sub(page, refs - 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   63) 	put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   64) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   65) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   66) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   67)  * Return the compound head page with ref appropriately incremented,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   68)  * or NULL if that failed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   69)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   70) static inline struct page *try_get_compound_head(struct page *page, int refs)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   71) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   72) 	struct page *head = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   73) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   74) 	if (WARN_ON_ONCE(page_ref_count(head) < 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   75) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   76) 	if (unlikely(!page_cache_add_speculative(head, refs)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   77) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   78) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   79) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   80) 	 * At this point we have a stable reference to the head page; but it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   81) 	 * could be that between the compound_head() lookup and the refcount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   82) 	 * increment, the compound page was split, in which case we'd end up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   83) 	 * holding a reference on a page that has nothing to do with the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   84) 	 * we were given anymore.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   85) 	 * So now that the head page is stable, recheck that the pages still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   86) 	 * belong together.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   87) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   88) 	if (unlikely(compound_head(page) != head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   89) 		put_page_refs(head, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   90) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   91) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   92) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   93) 	return head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   94) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   95) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   96) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   97)  * try_grab_compound_head() - attempt to elevate a page's refcount, by a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   98)  * flags-dependent amount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300   99)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  100)  * "grab" names in this file mean, "look at flags to decide whether to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  101)  * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  102)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  103)  * Either FOLL_PIN or FOLL_GET (or neither) must be set, but not both at the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  104)  * same time. (That's true throughout the get_user_pages*() and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  105)  * pin_user_pages*() APIs.) Cases:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  106)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  107)  *    FOLL_GET: page's refcount will be incremented by 1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  108)  *    FOLL_PIN: page's refcount will be incremented by GUP_PIN_COUNTING_BIAS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  109)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  110)  * Return: head page (with refcount appropriately incremented) for success, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  111)  * NULL upon failure. If neither FOLL_GET nor FOLL_PIN was set, that's
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  112)  * considered failure, and furthermore, a likely bug in the caller, so a warning
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  113)  * is also emitted.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  114)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  115) static __maybe_unused struct page *try_grab_compound_head(struct page *page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  116) 							  int refs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  117) 							  unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  118) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  119) 	if (flags & FOLL_GET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  120) 		struct page *head = try_get_compound_head(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  121) 		if (head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  122) 			set_page_pinner(head, compound_order(head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  123) 		return head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  124) 	} else if (flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  125) 		int orig_refs = refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  127) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  128) 		 * Can't do FOLL_LONGTERM + FOLL_PIN with CMA in the gup fast
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  129) 		 * path, so fail and let the caller fall back to the slow path.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  130) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  131) 		if (unlikely(flags & FOLL_LONGTERM) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  132) 				is_migrate_cma_page(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  133) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  135) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  136) 		 * CAUTION: Don't use compound_head() on the page before this
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  137) 		 * point, the result won't be stable.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  138) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  139) 		page = try_get_compound_head(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  140) 		if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  141) 			return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  143) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  144) 		 * When pinning a compound page of order > 1 (which is what
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  145) 		 * hpage_pincount_available() checks for), use an exact count to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  146) 		 * track it, via hpage_pincount_add/_sub().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  147) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  148) 		 * However, be sure to *also* increment the normal page refcount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  149) 		 * field at least once, so that the page really is pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  150) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  151) 		if (hpage_pincount_available(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  152) 			hpage_pincount_add(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  153) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  154) 			page_ref_add(page, refs * (GUP_PIN_COUNTING_BIAS - 1));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  155) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  156) 		mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  157) 				    orig_refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  158) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  159) 		return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  160) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  161) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  162) 	WARN_ON_ONCE(1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  163) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  164) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  165) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  166) static void put_compound_head(struct page *page, int refs, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  167) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  168) 	if (flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  169) 		mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_RELEASED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  170) 				    refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  171) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  172) 		if (hpage_pincount_available(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  173) 			hpage_pincount_sub(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  174) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  175) 			refs *= GUP_PIN_COUNTING_BIAS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  176) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  178) 	if (flags & FOLL_GET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  179) 		reset_page_pinner(page, compound_order(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  180) 	put_page_refs(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  181) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  182) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  183) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  184)  * try_grab_page() - elevate a page's refcount by a flag-dependent amount
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  185)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  186)  * This might not do anything at all, depending on the flags argument.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  187)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  188)  * "grab" names in this file mean, "look at flags to decide whether to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  189)  * FOLL_PIN or FOLL_GET behavior, when incrementing the page's refcount.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  190)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  191)  * @page:    pointer to page to be grabbed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  192)  * @flags:   gup flags: these are the FOLL_* flag values.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  193)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  194)  * Either FOLL_PIN or FOLL_GET (or neither) may be set, but not both at the same
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  195)  * time. Cases:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  196)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  197)  *    FOLL_GET: page's refcount will be incremented by 1.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  198)  *    FOLL_PIN: page's refcount will be incremented by GUP_PIN_COUNTING_BIAS.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  199)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  200)  * Return: true for success, or if no action was required (if neither FOLL_PIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  201)  * nor FOLL_GET was set, nothing is done). False for failure: FOLL_GET or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  202)  * FOLL_PIN was set, but the page could not be grabbed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  203)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  204) bool __must_check try_grab_page(struct page *page, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  205) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  206) 	WARN_ON_ONCE((flags & (FOLL_GET | FOLL_PIN)) == (FOLL_GET | FOLL_PIN));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  207) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  208) 	if (flags & FOLL_GET) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  209) 		bool ret = try_get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  210) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  211) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  212) 			page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  213) 			set_page_pinner(page, compound_order(page));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  214) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  215) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  216) 	} else if (flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  217) 		int refs = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  218) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  219) 		page = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  220) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  221) 		if (WARN_ON_ONCE(page_ref_count(page) <= 0))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  222) 			return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  223) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  224) 		if (hpage_pincount_available(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  225) 			hpage_pincount_add(page, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  226) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  227) 			refs = GUP_PIN_COUNTING_BIAS;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  228) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  229) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  230) 		 * Similar to try_grab_compound_head(): even if using the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  231) 		 * hpage_pincount_add/_sub() routines, be sure to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  232) 		 * *also* increment the normal page refcount field at least
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  233) 		 * once, so that the page really is pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  234) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  235) 		page_ref_add(page, refs);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  237) 		mod_node_page_state(page_pgdat(page), NR_FOLL_PIN_ACQUIRED, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  238) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  240) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  241) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  242) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  243) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  244)  * unpin_user_page() - release a dma-pinned page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  245)  * @page:            pointer to page to be released
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  246)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  247)  * Pages that were pinned via pin_user_pages*() must be released via either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  248)  * unpin_user_page(), or one of the unpin_user_pages*() routines. This is so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  249)  * that such pages can be separately tracked and uniquely handled. In
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  250)  * particular, interactions with RDMA and filesystems need special handling.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  251)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  252) void unpin_user_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  253) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  254) 	put_compound_head(compound_head(page), 1, FOLL_PIN);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  255) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  256) EXPORT_SYMBOL(unpin_user_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  258) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  259)  * put_user_page() - release a page obtained using get_user_pages() or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  260)  *                   follow_page(FOLL_GET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  261)  * @page:            pointer to page to be released
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  262)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  263)  * Pages that were obtained via get_user_pages()/follow_page(FOLL_GET) must be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  264)  * released via put_user_page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  265)  * note: If it's not a page from GUP or follow_page(FOLL_GET), it's harmless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  266)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  267) void put_user_page(struct page *page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  268) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  269) 	struct page *head = compound_head(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  271) 	reset_page_pinner(head, compound_order(head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  272) 	put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  273) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  274) EXPORT_SYMBOL(put_user_page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  275) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  276) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  277)  * unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  278)  * @pages:  array of pages to be maybe marked dirty, and definitely released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  279)  * @npages: number of pages in the @pages array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  280)  * @make_dirty: whether to mark the pages dirty
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  281)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  282)  * "gup-pinned page" refers to a page that has had one of the get_user_pages()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  283)  * variants called on that page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  284)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  285)  * For each page in the @pages array, make that page (or its head page, if a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  286)  * compound page) dirty, if @make_dirty is true, and if the page was previously
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  287)  * listed as clean. In any case, releases all pages using unpin_user_page(),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  288)  * possibly via unpin_user_pages(), for the non-dirty case.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  289)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  290)  * Please see the unpin_user_page() documentation for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  291)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  292)  * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  293)  * required, then the caller should a) verify that this is really correct,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  294)  * because _lock() is usually required, and b) hand code it:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  295)  * set_page_dirty_lock(), unpin_user_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  296)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  297)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  298) void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  299) 				 bool make_dirty)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  301) 	unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  302) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  303) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  304) 	 * TODO: this can be optimized for huge pages: if a series of pages is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  305) 	 * physically contiguous and part of the same compound page, then a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  306) 	 * single operation to the head page should suffice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  307) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  308) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  309) 	if (!make_dirty) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  310) 		unpin_user_pages(pages, npages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  311) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  312) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  314) 	for (index = 0; index < npages; index++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  315) 		struct page *page = compound_head(pages[index]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  316) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  317) 		 * Checking PageDirty at this point may race with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  318) 		 * clear_page_dirty_for_io(), but that's OK. Two key
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  319) 		 * cases:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  320) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  321) 		 * 1) This code sees the page as already dirty, so it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  322) 		 * skips the call to set_page_dirty(). That could happen
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  323) 		 * because clear_page_dirty_for_io() called
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  324) 		 * page_mkclean(), followed by set_page_dirty().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  325) 		 * However, now the page is going to get written back,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  326) 		 * which meets the original intention of setting it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  327) 		 * dirty, so all is well: clear_page_dirty_for_io() goes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  328) 		 * on to call TestClearPageDirty(), and write the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  329) 		 * back.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  330) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  331) 		 * 2) This code sees the page as clean, so it calls
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  332) 		 * set_page_dirty(). The page stays dirty, despite being
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  333) 		 * written back, so it gets written back again in the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  334) 		 * next writeback cycle. This is harmless.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  335) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  336) 		if (!PageDirty(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  337) 			set_page_dirty_lock(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  338) 		unpin_user_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  339) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  340) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  341) EXPORT_SYMBOL(unpin_user_pages_dirty_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  342) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  343) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  344)  * unpin_user_pages() - release an array of gup-pinned pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  345)  * @pages:  array of pages to be marked dirty and released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  346)  * @npages: number of pages in the @pages array.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  347)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  348)  * For each page in the @pages array, release the page using unpin_user_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  349)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  350)  * Please see the unpin_user_page() documentation for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  351)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  352) void unpin_user_pages(struct page **pages, unsigned long npages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  353) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  354) 	unsigned long index;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  355) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  356) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  357) 	 * If this WARN_ON() fires, then the system *might* be leaking pages (by
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  358) 	 * leaving them pinned), but probably not. More likely, gup/pup returned
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  359) 	 * a hard -ERRNO error to the caller, who erroneously passed it here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  360) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  361) 	if (WARN_ON(IS_ERR_VALUE(npages)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  362) 		return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  363) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  364) 	 * TODO: this can be optimized for huge pages: if a series of pages is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  365) 	 * physically contiguous and part of the same compound page, then a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  366) 	 * single operation to the head page should suffice.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  367) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  368) 	for (index = 0; index < npages; index++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  369) 		unpin_user_page(pages[index]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  370) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  371) EXPORT_SYMBOL(unpin_user_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  372) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  373) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  374) static struct page *no_page_table(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  375) 		unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  376) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  377) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  378) 	 * When core dumping an enormous anonymous area that nobody
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  379) 	 * has touched so far, we don't want to allocate unnecessary pages or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  380) 	 * page tables.  Return error instead of NULL to skip handle_mm_fault,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  381) 	 * then get_dump_page() will return NULL to leave a hole in the dump.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  382) 	 * But we can only make this optimization where a hole would surely
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  383) 	 * be zero-filled if handle_mm_fault() actually did handle it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  384) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  385) 	if ((flags & FOLL_DUMP) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  386) 			(vma_is_anonymous(vma) || !vma->vm_ops->fault))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  387) 		return ERR_PTR(-EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  388) 	return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  389) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  390) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  391) static int follow_pfn_pte(struct vm_area_struct *vma, unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  392) 		pte_t *pte, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  393) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  394) 	/* No page to get reference */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  395) 	if (flags & FOLL_GET)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  396) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  397) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  398) 	if (flags & FOLL_TOUCH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  399) 		pte_t entry = *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  400) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  401) 		if (flags & FOLL_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  402) 			entry = pte_mkdirty(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  403) 		entry = pte_mkyoung(entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  404) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  405) 		if (!pte_same(*pte, entry)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  406) 			set_pte_at(vma->vm_mm, address, pte, entry);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  407) 			update_mmu_cache(vma, address, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  408) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  409) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  411) 	/* Proper page table entry exists, but no corresponding struct page */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  412) 	return -EEXIST;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  413) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  414) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  415) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  416)  * FOLL_FORCE can write to even unwritable pte's, but only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  417)  * after we've gone through a COW cycle and they are dirty.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  418)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  419) static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  420) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  421) 	return pte_write(pte) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  422) 		((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  423) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  424) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  425) static struct page *follow_page_pte(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  426) 		unsigned long address, pmd_t *pmd, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  427) 		struct dev_pagemap **pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  428) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  429) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  430) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  431) 	spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  432) 	pte_t *ptep, pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  433) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  434) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  435) 	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  436) 	if (WARN_ON_ONCE((flags & (FOLL_PIN | FOLL_GET)) ==
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  437) 			 (FOLL_PIN | FOLL_GET)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  438) 		return ERR_PTR(-EINVAL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  439) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  440) 	if (unlikely(pmd_bad(*pmd)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  441) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  442) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  443) 	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  444) 	pte = *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  445) 	if (!pte_present(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  446) 		swp_entry_t entry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  447) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  448) 		 * KSM's break_ksm() relies upon recognizing a ksm page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  449) 		 * even while it is being migrated, so for that case we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  450) 		 * need migration_entry_wait().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  451) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  452) 		if (likely(!(flags & FOLL_MIGRATION)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  453) 			goto no_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  454) 		if (pte_none(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  455) 			goto no_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  456) 		entry = pte_to_swp_entry(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  457) 		if (!is_migration_entry(entry))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  458) 			goto no_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  459) 		pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  460) 		migration_entry_wait(mm, pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  461) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  462) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  463) 	if ((flags & FOLL_NUMA) && pte_protnone(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  464) 		goto no_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  465) 	if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  466) 		pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  467) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  468) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  469) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  470) 	page = vm_normal_page(vma, address, pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  471) 	if (!page && pte_devmap(pte) && (flags & (FOLL_GET | FOLL_PIN))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  472) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  473) 		 * Only return device mapping pages in the FOLL_GET or FOLL_PIN
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  474) 		 * case since they are only valid while holding the pgmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  475) 		 * reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  476) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  477) 		*pgmap = get_dev_pagemap(pte_pfn(pte), *pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  478) 		if (*pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  479) 			page = pte_page(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  480) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  481) 			goto no_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  482) 	} else if (unlikely(!page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  483) 		if (flags & FOLL_DUMP) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  484) 			/* Avoid special (like zero) pages in core dumps */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  485) 			page = ERR_PTR(-EFAULT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  486) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  487) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  488) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  489) 		if (is_zero_pfn(pte_pfn(pte))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  490) 			page = pte_page(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  491) 		} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  492) 			ret = follow_pfn_pte(vma, address, ptep, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  493) 			page = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  494) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  495) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  496) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  497) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  498) 	if (flags & FOLL_SPLIT && PageTransCompound(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  499) 		get_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  500) 		pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  501) 		lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  502) 		ret = split_huge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  503) 		unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  504) 		put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  505) 		if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  506) 			return ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  507) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  508) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  510) 	/* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  511) 	if (unlikely(!try_grab_page(page, flags))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  512) 		page = ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  513) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  514) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  515) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  516) 	 * We need to make the page accessible if and only if we are going
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  517) 	 * to access its content (the FOLL_PIN case).  Please see
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  518) 	 * Documentation/core-api/pin_user_pages.rst for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  519) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  520) 	if (flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  521) 		ret = arch_make_page_accessible(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  522) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  523) 			unpin_user_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  524) 			page = ERR_PTR(ret);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  525) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  526) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  527) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  528) 	if (flags & FOLL_TOUCH) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  529) 		if ((flags & FOLL_WRITE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  530) 		    !pte_dirty(pte) && !PageDirty(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  531) 			set_page_dirty(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  532) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  533) 		 * pte_mkyoung() would be more correct here, but atomic care
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  534) 		 * is needed to avoid losing the dirty bit: it is easier to use
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  535) 		 * mark_page_accessed().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  536) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  537) 		mark_page_accessed(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  538) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  539) 	if ((flags & FOLL_MLOCK) && (vma->vm_flags & VM_LOCKED)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  540) 		/* Do not mlock pte-mapped THP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  541) 		if (PageTransCompound(page))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  542) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  544) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  545) 		 * The preliminary mapping check is mainly to avoid the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  546) 		 * pointless overhead of lock_page on the ZERO_PAGE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  547) 		 * which might bounce very badly if there is contention.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  548) 		 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  549) 		 * If the page is already locked, we don't need to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  550) 		 * handle it now - vmscan will handle it later if and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  551) 		 * when it attempts to reclaim the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  552) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  553) 		if (page->mapping && trylock_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  554) 			lru_add_drain();  /* push cached pages to LRU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  555) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  556) 			 * Because we lock page here, and migration is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  557) 			 * blocked by the pte's page reference, and we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  558) 			 * know the page is still mapped, we don't even
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  559) 			 * need to check for file-cache page truncation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  560) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  561) 			mlock_vma_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  562) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  563) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  564) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  565) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  566) 	pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  567) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  568) no_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  569) 	pte_unmap_unlock(ptep, ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  570) 	if (!pte_none(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  571) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  572) 	return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  573) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  574) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  575) static struct page *follow_pmd_mask(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  576) 				    unsigned long address, pud_t *pudp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  577) 				    unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  578) 				    struct follow_page_context *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  579) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  580) 	pmd_t *pmd, pmdval;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  581) 	spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  582) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  583) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  584) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  585) 	pmd = pmd_offset(pudp, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  586) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  587) 	 * The READ_ONCE() will stabilize the pmdval in a register or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  588) 	 * on the stack so that it will stop changing under the code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  589) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  590) 	pmdval = READ_ONCE(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  591) 	if (pmd_none(pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  592) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  593) 	if (pmd_huge(pmdval) && is_vm_hugetlb_page(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  594) 		page = follow_huge_pmd(mm, address, pmd, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  595) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  596) 			return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  597) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  598) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  599) 	if (is_hugepd(__hugepd(pmd_val(pmdval)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  600) 		page = follow_huge_pd(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  601) 				      __hugepd(pmd_val(pmdval)), flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  602) 				      PMD_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  603) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  604) 			return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  605) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  606) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  607) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  608) 	if (!pmd_present(pmdval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  609) 		if (likely(!(flags & FOLL_MIGRATION)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  610) 			return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  611) 		VM_BUG_ON(thp_migration_supported() &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  612) 				  !is_pmd_migration_entry(pmdval));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  613) 		if (is_pmd_migration_entry(pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  614) 			pmd_migration_entry_wait(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  615) 		pmdval = READ_ONCE(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  616) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  617) 		 * MADV_DONTNEED may convert the pmd to null because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  618) 		 * mmap_lock is held in read mode
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  619) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  620) 		if (pmd_none(pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  621) 			return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  622) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  623) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  624) 	if (pmd_devmap(pmdval)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  625) 		ptl = pmd_lock(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  626) 		page = follow_devmap_pmd(vma, address, pmd, flags, &ctx->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  627) 		spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  628) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  629) 			return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  630) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  631) 	if (likely(!pmd_trans_huge(pmdval)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  632) 		return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  633) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  634) 	if ((flags & FOLL_NUMA) && pmd_protnone(pmdval))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  635) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  636) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  637) retry_locked:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  638) 	ptl = pmd_lock(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  639) 	if (unlikely(pmd_none(*pmd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  640) 		spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  641) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  642) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  643) 	if (unlikely(!pmd_present(*pmd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  644) 		spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  645) 		if (likely(!(flags & FOLL_MIGRATION)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  646) 			return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  647) 		pmd_migration_entry_wait(mm, pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  648) 		goto retry_locked;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  649) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  650) 	if (unlikely(!pmd_trans_huge(*pmd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  651) 		spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  652) 		return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  653) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  654) 	if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  655) 		int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  656) 		page = pmd_page(*pmd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  657) 		if (is_huge_zero_page(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  658) 			spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  659) 			ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  660) 			split_huge_pmd(vma, pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  661) 			if (pmd_trans_unstable(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  662) 				ret = -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  663) 		} else if (flags & FOLL_SPLIT) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  664) 			if (unlikely(!try_get_page(page))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  665) 				spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  666) 				return ERR_PTR(-ENOMEM);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  667) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  668) 			spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  669) 			lock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  670) 			ret = split_huge_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  671) 			unlock_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  672) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  673) 			if (pmd_none(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  674) 				return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  675) 		} else {  /* flags & FOLL_SPLIT_PMD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  676) 			spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  677) 			split_huge_pmd(vma, pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  678) 			ret = pte_alloc(mm, pmd) ? -ENOMEM : 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  679) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  680) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  681) 		return ret ? ERR_PTR(ret) :
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  682) 			follow_page_pte(vma, address, pmd, flags, &ctx->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  683) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  684) 	page = follow_trans_huge_pmd(vma, address, pmd, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  685) 	spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  686) 	ctx->page_mask = HPAGE_PMD_NR - 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  687) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  688) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  690) static struct page *follow_pud_mask(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  691) 				    unsigned long address, p4d_t *p4dp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  692) 				    unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  693) 				    struct follow_page_context *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  694) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  695) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  696) 	spinlock_t *ptl;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  697) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  698) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  699) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  700) 	pud = pud_offset(p4dp, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  701) 	if (pud_none(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  702) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  703) 	if (pud_huge(*pud) && is_vm_hugetlb_page(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  704) 		page = follow_huge_pud(mm, address, pud, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  705) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  706) 			return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  707) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  708) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  709) 	if (is_hugepd(__hugepd(pud_val(*pud)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  710) 		page = follow_huge_pd(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  711) 				      __hugepd(pud_val(*pud)), flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  712) 				      PUD_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  713) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  714) 			return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  715) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  716) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  717) 	if (pud_devmap(*pud)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  718) 		ptl = pud_lock(mm, pud);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  719) 		page = follow_devmap_pud(vma, address, pud, flags, &ctx->pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  720) 		spin_unlock(ptl);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  721) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  722) 			return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  723) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  724) 	if (unlikely(pud_bad(*pud)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  725) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  726) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  727) 	return follow_pmd_mask(vma, address, pud, flags, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  728) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  729) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  730) static struct page *follow_p4d_mask(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  731) 				    unsigned long address, pgd_t *pgdp,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  732) 				    unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  733) 				    struct follow_page_context *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  734) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  735) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  736) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  738) 	p4d = p4d_offset(pgdp, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  739) 	if (p4d_none(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  740) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  741) 	BUILD_BUG_ON(p4d_huge(*p4d));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  742) 	if (unlikely(p4d_bad(*p4d)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  743) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  744) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  745) 	if (is_hugepd(__hugepd(p4d_val(*p4d)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  746) 		page = follow_huge_pd(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  747) 				      __hugepd(p4d_val(*p4d)), flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  748) 				      P4D_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  749) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  750) 			return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  751) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  752) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  753) 	return follow_pud_mask(vma, address, p4d, flags, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  754) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  755) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  756) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  757)  * follow_page_mask - look up a page descriptor from a user-virtual address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  758)  * @vma: vm_area_struct mapping @address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  759)  * @address: virtual address to look up
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  760)  * @flags: flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  761)  * @ctx: contains dev_pagemap for %ZONE_DEVICE memory pinning and a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  762)  *       pointer to output page_mask
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  763)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  764)  * @flags can have FOLL_ flags set, defined in <linux/mm.h>
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  765)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  766)  * When getting pages from ZONE_DEVICE memory, the @ctx->pgmap caches
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  767)  * the device's dev_pagemap metadata to avoid repeating expensive lookups.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  768)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  769)  * On output, the @ctx->page_mask is set according to the size of the page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  770)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  771)  * Return: the mapped (struct page *), %NULL if no mapping exists, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  772)  * an error pointer if there is a mapping to something not represented
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  773)  * by a page descriptor (see also vm_normal_page()).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  774)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  775) static struct page *follow_page_mask(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  776) 			      unsigned long address, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  777) 			      struct follow_page_context *ctx)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  778) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  779) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  780) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  781) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  782) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  783) 	ctx->page_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  784) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  785) 	/* make this handle hugepd */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  786) 	page = follow_huge_addr(mm, address, flags & FOLL_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  787) 	if (!IS_ERR(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  788) 		WARN_ON_ONCE(flags & (FOLL_GET | FOLL_PIN));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  789) 		return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  790) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  791) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  792) 	pgd = pgd_offset(mm, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  793) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  794) 	if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  795) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  796) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  797) 	if (pgd_huge(*pgd)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  798) 		page = follow_huge_pgd(mm, address, pgd, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  799) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  800) 			return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  801) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  802) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  803) 	if (is_hugepd(__hugepd(pgd_val(*pgd)))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  804) 		page = follow_huge_pd(vma, address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  805) 				      __hugepd(pgd_val(*pgd)), flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  806) 				      PGDIR_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  807) 		if (page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  808) 			return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  809) 		return no_page_table(vma, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  810) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  811) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  812) 	return follow_p4d_mask(vma, address, pgd, flags, ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  813) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  814) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  815) struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  816) 			 unsigned int foll_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  817) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  818) 	struct follow_page_context ctx = { NULL };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  819) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  820) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  821) 	page = follow_page_mask(vma, address, foll_flags, &ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  822) 	if (ctx.pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  823) 		put_dev_pagemap(ctx.pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  824) 	return page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  825) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  826) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  827) static int get_gate_page(struct mm_struct *mm, unsigned long address,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  828) 		unsigned int gup_flags, struct vm_area_struct **vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  829) 		struct page **page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  830) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  831) 	pgd_t *pgd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  832) 	p4d_t *p4d;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  833) 	pud_t *pud;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  834) 	pmd_t *pmd;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  835) 	pte_t *pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  836) 	int ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  837) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  838) 	/* user gate pages are read-only */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  839) 	if (gup_flags & FOLL_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  840) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  841) 	if (address > TASK_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  842) 		pgd = pgd_offset_k(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  843) 	else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  844) 		pgd = pgd_offset_gate(mm, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  845) 	if (pgd_none(*pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  846) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  847) 	p4d = p4d_offset(pgd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  848) 	if (p4d_none(*p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  849) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  850) 	pud = pud_offset(p4d, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  851) 	if (pud_none(*pud))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  852) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  853) 	pmd = pmd_offset(pud, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  854) 	if (!pmd_present(*pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  855) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  856) 	VM_BUG_ON(pmd_trans_huge(*pmd));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  857) 	pte = pte_offset_map(pmd, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  858) 	if (pte_none(*pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  859) 		goto unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  860) 	*vma = get_gate_vma(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  861) 	if (!page)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  862) 		goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  863) 	*page = vm_normal_page(*vma, address, *pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  864) 	if (!*page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  865) 		if ((gup_flags & FOLL_DUMP) || !is_zero_pfn(pte_pfn(*pte)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  866) 			goto unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  867) 		*page = pte_page(*pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  868) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  869) 	if (unlikely(!try_grab_page(*page, gup_flags))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  870) 		ret = -ENOMEM;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  871) 		goto unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  872) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  873) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  874) 	ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  875) unmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  876) 	pte_unmap(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  877) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  878) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  879) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  880) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  881)  * mmap_lock must be held on entry.  If @locked != NULL and *@flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  882)  * does not include FOLL_NOWAIT, the mmap_lock may be released.  If it
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  883)  * is, *@locked will be set to 0 and -EBUSY returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  884)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  885) static int faultin_page(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  886) 		unsigned long address, unsigned int *flags, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  887) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  888) 	unsigned int fault_flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  889) 	vm_fault_t ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  890) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  891) 	/* mlock all present pages, but do not fault in new pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  892) 	if ((*flags & (FOLL_POPULATE | FOLL_MLOCK)) == FOLL_MLOCK)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  893) 		return -ENOENT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  894) 	if (*flags & FOLL_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  895) 		fault_flags |= FAULT_FLAG_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  896) 	if (*flags & FOLL_REMOTE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  897) 		fault_flags |= FAULT_FLAG_REMOTE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  898) 	if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  899) 		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  900) 	if (*flags & FOLL_NOWAIT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  901) 		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  902) 	if (*flags & FOLL_TRIED) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  903) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  904) 		 * Note: FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_TRIED
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  905) 		 * can co-exist
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  906) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  907) 		fault_flags |= FAULT_FLAG_TRIED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  908) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  909) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  910) 	ret = handle_mm_fault(vma, address, fault_flags, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  911) 	if (ret & VM_FAULT_ERROR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  912) 		int err = vm_fault_to_errno(ret, *flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  913) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  914) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  915) 			return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  916) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  917) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  918) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  919) 	if (ret & VM_FAULT_RETRY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  920) 		if (locked && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  921) 			*locked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  922) 		return -EBUSY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  923) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  924) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  925) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  926) 	 * The VM_FAULT_WRITE bit tells us that do_wp_page has broken COW when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  927) 	 * necessary, even if maybe_mkwrite decided not to set pte_write. We
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  928) 	 * can thus safely do subsequent page lookups as if they were reads.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  929) 	 * But only do so when looping for pte_write is futile: in some cases
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  930) 	 * userspace may also be wanting to write to the gotten user page,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  931) 	 * which a read fault here might prevent (a readonly page might get
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  932) 	 * reCOWed by userspace write).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  933) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  934) 	if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  935) 		*flags |= FOLL_COW;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  936) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  938) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  939) static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  940) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  941) 	vm_flags_t vm_flags = vma->vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  942) 	int write = (gup_flags & FOLL_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  943) 	int foreign = (gup_flags & FOLL_REMOTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  944) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  945) 	if (vm_flags & (VM_IO | VM_PFNMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  946) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  947) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  948) 	if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  949) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  950) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  951) 	if ((gup_flags & FOLL_LONGTERM) && vma_is_fsdax(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  952) 		return -EOPNOTSUPP;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  953) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  954) 	if (write) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  955) 		if (!(vm_flags & VM_WRITE)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  956) 			if (!(gup_flags & FOLL_FORCE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  957) 				return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  958) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  959) 			 * We used to let the write,force case do COW in a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  960) 			 * VM_MAYWRITE VM_SHARED !VM_WRITE vma, so ptrace could
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  961) 			 * set a breakpoint in a read-only mapping of an
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  962) 			 * executable, without corrupting the file (yet only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  963) 			 * when that file had been opened for writing!).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  964) 			 * Anon pages in shared mappings are surprising: now
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  965) 			 * just reject it.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  966) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  967) 			if (!is_cow_mapping(vm_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  968) 				return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  969) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  970) 	} else if (!(vm_flags & VM_READ)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  971) 		if (!(gup_flags & FOLL_FORCE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  972) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  973) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  974) 		 * Is there actually any vma we can reach here which does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  975) 		 * have VM_MAYREAD set?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  976) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  977) 		if (!(vm_flags & VM_MAYREAD))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  978) 			return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  979) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  980) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  981) 	 * gups are always data accesses, not instruction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  982) 	 * fetches, so execute=false here
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  983) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  984) 	if (!arch_vma_access_permitted(vma, write, false, foreign))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  985) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  986) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  987) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  988) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  989) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  990)  * __get_user_pages() - pin user pages in memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  991)  * @mm:		mm_struct of target mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  992)  * @start:	starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  993)  * @nr_pages:	number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  994)  * @gup_flags:	flags modifying pin behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  995)  * @pages:	array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  996)  *		Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  997)  *		only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  998)  * @vmas:	array of pointers to vmas corresponding to each page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300  999)  *		Or NULL if the caller does not require them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1000)  * @locked:     whether we're still with the mmap_lock held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1001)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1002)  * Returns either number of pages pinned (which may be less than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1003)  * number requested), or an error. Details about the return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1004)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1005)  * -- If nr_pages is 0, returns 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1006)  * -- If nr_pages is >0, but no pages were pinned, returns -errno.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1007)  * -- If nr_pages is >0, and some pages were pinned, returns the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1008)  *    pages pinned. Again, this may be less than nr_pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1009)  * -- 0 return value is possible when the fault would need to be retried.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1010)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1011)  * The caller is responsible for releasing returned @pages, via put_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1012)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1013)  * @vmas are valid only as long as mmap_lock is held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1014)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1015)  * Must be called with mmap_lock held.  It may be released.  See below.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1016)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1017)  * __get_user_pages walks a process's page tables and takes a reference to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1018)  * each struct page that each user address corresponds to at a given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1019)  * instant. That is, it takes the page that would be accessed if a user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1020)  * thread accesses the given user virtual address at that instant.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1021)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1022)  * This does not guarantee that the page exists in the user mappings when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1023)  * __get_user_pages returns, and there may even be a completely different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1024)  * page there in some cases (eg. if mmapped pagecache has been invalidated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1025)  * and subsequently re faulted). However it does guarantee that the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1026)  * won't be freed completely. And mostly callers simply care that the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1027)  * contains data that was valid *at some point in time*. Typically, an IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1028)  * or similar operation cannot guarantee anything stronger anyway because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1029)  * locks can't be held over the syscall boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1030)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1031)  * If @gup_flags & FOLL_WRITE == 0, the page must not be written to. If
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1032)  * the page is written to, set_page_dirty (or set_page_dirty_lock, as
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1033)  * appropriate) must be called after the page is finished with, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1034)  * before put_page is called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1035)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1036)  * If @locked != NULL, *@locked will be set to 0 when mmap_lock is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1037)  * released by an up_read().  That can happen if @gup_flags does not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1038)  * have FOLL_NOWAIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1039)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1040)  * A caller using such a combination of @locked and @gup_flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1041)  * must therefore hold the mmap_lock for reading only, and recognize
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1042)  * when it's been released.  Otherwise, it must be held for either
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1043)  * reading or writing and will not be released.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1044)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1045)  * In most cases, get_user_pages or get_user_pages_fast should be used
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1046)  * instead of __get_user_pages. __get_user_pages should be used only if
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1047)  * you need some special @gup_flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1048)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1049) static long __get_user_pages(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1050) 		unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1051) 		unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1052) 		struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1053) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1054) 	long ret = 0, i = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1055) 	struct vm_area_struct *vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1056) 	struct follow_page_context ctx = { NULL };
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1057) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1058) 	if (!nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1059) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1060) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1061) 	start = untagged_addr(start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1062) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1063) 	VM_BUG_ON(!!pages != !!(gup_flags & (FOLL_GET | FOLL_PIN)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1064) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1065) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1066) 	 * If FOLL_FORCE is set then do not force a full fault as the hinting
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1067) 	 * fault information is unrelated to the reference behaviour of a task
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1068) 	 * using the address space
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1069) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1070) 	if (!(gup_flags & FOLL_FORCE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1071) 		gup_flags |= FOLL_NUMA;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1072) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1073) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1074) 		struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1075) 		unsigned int foll_flags = gup_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1076) 		unsigned int page_increm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1077) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1078) 		/* first iteration or cross vma bound */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1079) 		if (!vma || start >= vma->vm_end) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1080) 			vma = find_extend_vma(mm, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1081) 			if (!vma && in_gate_area(mm, start)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1082) 				ret = get_gate_page(mm, start & PAGE_MASK,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1083) 						gup_flags, &vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1084) 						pages ? &pages[i] : NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1085) 				if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1086) 					goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1087) 				ctx.page_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1088) 				goto next_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1089) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1090) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1091) 			if (!vma) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1092) 				ret = -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1093) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1094) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1095) 			ret = check_vma_flags(vma, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1096) 			if (ret)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1097) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1098) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1099) 			if (is_vm_hugetlb_page(vma)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1100) 				i = follow_hugetlb_page(mm, vma, pages, vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1101) 						&start, &nr_pages, i,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1102) 						gup_flags, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1103) 				if (locked && *locked == 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1104) 					/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1105) 					 * We've got a VM_FAULT_RETRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1106) 					 * and we've lost mmap_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1107) 					 * We must stop here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1108) 					 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1109) 					BUG_ON(gup_flags & FOLL_NOWAIT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1110) 					BUG_ON(ret != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1111) 					goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1112) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1113) 				continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1114) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1115) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1116) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1117) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1118) 		 * If we have a pending SIGKILL, don't keep faulting pages and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1119) 		 * potentially allocating memory.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1120) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1121) 		if (fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1122) 			ret = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1123) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1124) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1125) 		cond_resched();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1126) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1127) 		page = follow_page_mask(vma, start, foll_flags, &ctx);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1128) 		if (!page) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1129) 			ret = faultin_page(vma, start, &foll_flags, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1130) 			switch (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1131) 			case 0:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1132) 				goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1133) 			case -EBUSY:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1134) 				ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1135) 				fallthrough;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1136) 			case -EFAULT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1137) 			case -ENOMEM:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1138) 			case -EHWPOISON:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1139) 				goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1140) 			case -ENOENT:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1141) 				goto next_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1142) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1143) 			BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1144) 		} else if (PTR_ERR(page) == -EEXIST) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1145) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1146) 			 * Proper page table entry exists, but no corresponding
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1147) 			 * struct page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1148) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1149) 			goto next_page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1150) 		} else if (IS_ERR(page)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1151) 			ret = PTR_ERR(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1152) 			goto out;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1153) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1154) 		if (pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1155) 			pages[i] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1156) 			flush_anon_page(vma, page, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1157) 			flush_dcache_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1158) 			ctx.page_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1159) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1160) next_page:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1161) 		if (vmas) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1162) 			vmas[i] = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1163) 			ctx.page_mask = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1164) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1165) 		page_increm = 1 + (~(start >> PAGE_SHIFT) & ctx.page_mask);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1166) 		if (page_increm > nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1167) 			page_increm = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1168) 		i += page_increm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1169) 		start += page_increm * PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1170) 		nr_pages -= page_increm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1171) 	} while (nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1172) out:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1173) 	if (ctx.pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1174) 		put_dev_pagemap(ctx.pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1175) 	return i ? i : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1176) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1178) static bool vma_permits_fault(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1179) 			      unsigned int fault_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1180) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1181) 	bool write   = !!(fault_flags & FAULT_FLAG_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1182) 	bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1183) 	vm_flags_t vm_flags = write ? VM_WRITE : VM_READ;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1184) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1185) 	if (!(vm_flags & vma->vm_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1186) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1187) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1188) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1189) 	 * The architecture might have a hardware protection
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1190) 	 * mechanism other than read/write that can deny access.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1191) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1192) 	 * gup always represents data access, not instruction
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1193) 	 * fetches, so execute=false here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1194) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1195) 	if (!arch_vma_access_permitted(vma, write, false, foreign))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1196) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1197) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1198) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1200) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1201) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1202)  * fixup_user_fault() - manually resolve a user page fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1203)  * @mm:		mm_struct of target mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1204)  * @address:	user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1205)  * @fault_flags:flags to pass down to handle_mm_fault()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1206)  * @unlocked:	did we unlock the mmap_lock while retrying, maybe NULL if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1207)  *		does not allow retry. If NULL, the caller must guarantee
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1208)  *		that fault_flags does not contain FAULT_FLAG_ALLOW_RETRY.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1209)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1210)  * This is meant to be called in the specific scenario where for locking reasons
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1211)  * we try to access user memory in atomic context (within a pagefault_disable()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1212)  * section), this returns -EFAULT, and we want to resolve the user fault before
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1213)  * trying again.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1214)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1215)  * Typically this is meant to be used by the futex code.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1216)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1217)  * The main difference with get_user_pages() is that this function will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1218)  * unconditionally call handle_mm_fault() which will in turn perform all the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1219)  * necessary SW fixup of the dirty and young bits in the PTE, while
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1220)  * get_user_pages() only guarantees to update these in the struct page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1221)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1222)  * This is important for some architectures where those bits also gate the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1223)  * access permission to the page because they are maintained in software.  On
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1224)  * such architectures, gup() will not be enough to make a subsequent access
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1225)  * succeed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1226)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1227)  * This function will not return with an unlocked mmap_lock. So it has not the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1228)  * same semantics wrt the @mm->mmap_lock as does filemap_fault().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1229)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1230) int fixup_user_fault(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1231) 		     unsigned long address, unsigned int fault_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1232) 		     bool *unlocked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1233) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1234) 	struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1235) 	vm_fault_t ret, major = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1236) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1237) 	address = untagged_addr(address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1238) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1239) 	if (unlocked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1240) 		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1241) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1242) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1243) 	vma = find_extend_vma(mm, address);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1244) 	if (!vma || address < vma->vm_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1245) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1246) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1247) 	if (!vma_permits_fault(vma, fault_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1248) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1249) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1250) 	if ((fault_flags & FAULT_FLAG_KILLABLE) &&
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1251) 	    fatal_signal_pending(current))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1252) 		return -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1253) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1254) 	ret = handle_mm_fault(vma, address, fault_flags, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1255) 	major |= ret & VM_FAULT_MAJOR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1256) 	if (ret & VM_FAULT_ERROR) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1257) 		int err = vm_fault_to_errno(ret, 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1258) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1259) 		if (err)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1260) 			return err;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1261) 		BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1262) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1263) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1264) 	if (ret & VM_FAULT_RETRY) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1265) 		mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1266) 		*unlocked = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1267) 		fault_flags |= FAULT_FLAG_TRIED;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1268) 		goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1269) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1270) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1271) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1272) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1273) EXPORT_SYMBOL_GPL(fixup_user_fault);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1274) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1275) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1276)  * Please note that this function, unlike __get_user_pages will not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1277)  * return 0 for nr_pages > 0 without FOLL_NOWAIT
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1278)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1279) static __always_inline long __get_user_pages_locked(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1280) 						unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1281) 						unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1282) 						struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1283) 						struct vm_area_struct **vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1284) 						int *locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1285) 						unsigned int flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1286) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1287) 	long ret, pages_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1288) 	bool lock_dropped;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1289) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1290) 	if (locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1291) 		/* if VM_FAULT_RETRY can be returned, vmas become invalid */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1292) 		BUG_ON(vmas);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1293) 		/* check caller initialized locked */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1294) 		BUG_ON(*locked != 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1295) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1297) 	if (flags & FOLL_PIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1298) 		atomic_set(&mm->has_pinned, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1299) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1300) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1301) 	 * FOLL_PIN and FOLL_GET are mutually exclusive. Traditional behavior
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1302) 	 * is to set FOLL_GET if the caller wants pages[] filled in (but has
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1303) 	 * carelessly failed to specify FOLL_GET), so keep doing that, but only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1304) 	 * for FOLL_GET, not for the newer FOLL_PIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1305) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1306) 	 * FOLL_PIN always expects pages to be non-null, but no need to assert
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1307) 	 * that here, as any failures will be obvious enough.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1308) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1309) 	if (pages && !(flags & FOLL_PIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1310) 		flags |= FOLL_GET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1311) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1312) 	pages_done = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1313) 	lock_dropped = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1314) 	for (;;) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1315) 		ret = __get_user_pages(mm, start, nr_pages, flags, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1316) 				       vmas, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1317) 		if (!locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1318) 			/* VM_FAULT_RETRY couldn't trigger, bypass */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1319) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1320) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1321) 		/* VM_FAULT_RETRY cannot return errors */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1322) 		if (!*locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1323) 			BUG_ON(ret < 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1324) 			BUG_ON(ret >= nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1325) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1326) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1327) 		if (ret > 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1328) 			nr_pages -= ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1329) 			pages_done += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1330) 			if (!nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1331) 				break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1332) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1333) 		if (*locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1334) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1335) 			 * VM_FAULT_RETRY didn't trigger or it was a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1336) 			 * FOLL_NOWAIT.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1337) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1338) 			if (!pages_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1339) 				pages_done = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1340) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1341) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1342) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1343) 		 * VM_FAULT_RETRY triggered, so seek to the faulting offset.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1344) 		 * For the prefault case (!pages) we only update counts.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1345) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1346) 		if (likely(pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1347) 			pages += ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1348) 		start += ret << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1349) 		lock_dropped = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1350) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1351) retry:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1352) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1353) 		 * Repeat on the address that fired VM_FAULT_RETRY
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1354) 		 * with both FAULT_FLAG_ALLOW_RETRY and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1355) 		 * FAULT_FLAG_TRIED.  Note that GUP can be interrupted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1356) 		 * by fatal signals, so we need to check it before we
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1357) 		 * start trying again otherwise it can loop forever.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1358) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1359) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1360) 		if (fatal_signal_pending(current)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1361) 			if (!pages_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1362) 				pages_done = -EINTR;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1363) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1364) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1365) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1366) 		ret = mmap_read_lock_killable(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1367) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1368) 			BUG_ON(ret > 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1369) 			if (!pages_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1370) 				pages_done = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1371) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1372) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1373) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1374) 		*locked = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1375) 		ret = __get_user_pages(mm, start, 1, flags | FOLL_TRIED,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1376) 				       pages, NULL, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1377) 		if (!*locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1378) 			/* Continue to retry until we succeeded */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1379) 			BUG_ON(ret != 0);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1380) 			goto retry;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1381) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1382) 		if (ret != 1) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1383) 			BUG_ON(ret > 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1384) 			if (!pages_done)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1385) 				pages_done = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1386) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1387) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1388) 		nr_pages--;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1389) 		pages_done++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1390) 		if (!nr_pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1391) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1392) 		if (likely(pages))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1393) 			pages++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1394) 		start += PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1395) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1396) 	if (lock_dropped && *locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1397) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1398) 		 * We must let the caller know we temporarily dropped the lock
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1399) 		 * and so the critical section protected by it was lost.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1400) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1401) 		mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1402) 		*locked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1403) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1404) 	return pages_done;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1405) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1406) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1407) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1408)  * populate_vma_page_range() -  populate a range of pages in the vma.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1409)  * @vma:   target vma
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1410)  * @start: start address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1411)  * @end:   end address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1412)  * @locked: whether the mmap_lock is still held
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1413)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1414)  * This takes care of mlocking the pages too if VM_LOCKED is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1415)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1416)  * Return either number of pages pinned in the vma, or a negative error
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1417)  * code on error.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1418)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1419)  * vma->vm_mm->mmap_lock must be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1420)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1421)  * If @locked is NULL, it may be held for read or write and will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1422)  * be unperturbed.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1423)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1424)  * If @locked is non-NULL, it must held for read only and may be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1425)  * released.  If it's released, *@locked will be set to 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1426)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1427) long populate_vma_page_range(struct vm_area_struct *vma,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1428) 		unsigned long start, unsigned long end, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1429) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1430) 	struct mm_struct *mm = vma->vm_mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1431) 	unsigned long nr_pages = (end - start) / PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1432) 	int gup_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1433) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1434) 	VM_BUG_ON(start & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1435) 	VM_BUG_ON(end   & ~PAGE_MASK);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1436) 	VM_BUG_ON_VMA(start < vma->vm_start, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1437) 	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1438) 	mmap_assert_locked(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1439) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1440) 	gup_flags = FOLL_TOUCH | FOLL_POPULATE | FOLL_MLOCK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1441) 	if (vma->vm_flags & VM_LOCKONFAULT)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1442) 		gup_flags &= ~FOLL_POPULATE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1443) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1444) 	 * We want to touch writable mappings with a write fault in order
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1445) 	 * to break COW, except for shared mappings because these don't COW
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1446) 	 * and we would not want to dirty them for nothing.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1447) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1448) 	if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1449) 		gup_flags |= FOLL_WRITE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1450) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1451) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1452) 	 * We want mlock to succeed for regions that have any permissions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1453) 	 * other than PROT_NONE.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1454) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1455) 	if (vma_is_accessible(vma))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1456) 		gup_flags |= FOLL_FORCE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1457) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1458) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1459) 	 * We made sure addr is within a VMA, so the following will
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1460) 	 * not result in a stack expansion that recurses back here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1461) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1462) 	return __get_user_pages(mm, start, nr_pages, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1463) 				NULL, NULL, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1464) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1465) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1466) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1467)  * __mm_populate - populate and/or mlock pages within a range of address space.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1468)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1469)  * This is used to implement mlock() and the MAP_POPULATE / MAP_LOCKED mmap
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1470)  * flags. VMAs must be already marked with the desired vm_flags, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1471)  * mmap_lock must not be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1472)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1473) int __mm_populate(unsigned long start, unsigned long len, int ignore_errors)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1474) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1475) 	struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1476) 	unsigned long end, nstart, nend;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1477) 	struct vm_area_struct *vma = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1478) 	int locked = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1479) 	long ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1481) 	end = start + len;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1482) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1483) 	for (nstart = start; nstart < end; nstart = nend) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1484) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1485) 		 * We want to fault in pages for [nstart; end) address range.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1486) 		 * Find first corresponding VMA.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1487) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1488) 		if (!locked) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1489) 			locked = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1490) 			mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1491) 			vma = find_vma(mm, nstart);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1492) 		} else if (nstart >= vma->vm_end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1493) 			vma = vma->vm_next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1494) 		if (!vma || vma->vm_start >= end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1495) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1496) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1497) 		 * Set [nstart; nend) to intersection of desired address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1498) 		 * range with the first VMA. Also, skip undesirable VMA types.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1499) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1500) 		nend = min(end, vma->vm_end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1501) 		if (vma->vm_flags & (VM_IO | VM_PFNMAP))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1502) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1503) 		if (nstart < vma->vm_start)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1504) 			nstart = vma->vm_start;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1505) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1506) 		 * Now fault in a range of pages. populate_vma_page_range()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1507) 		 * double checks the vma flags, so that it won't mlock pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1508) 		 * if the vma was already munlocked.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1509) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1510) 		ret = populate_vma_page_range(vma, nstart, nend, &locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1511) 		if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1512) 			if (ignore_errors) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1513) 				ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1514) 				continue;	/* continue at next VMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1515) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1516) 			break;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1517) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1518) 		nend = nstart + ret * PAGE_SIZE;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1519) 		ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1520) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1521) 	if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1522) 		mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1523) 	return ret;	/* 0 or negative error code */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1524) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1525) #else /* CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1526) static long __get_user_pages_locked(struct mm_struct *mm, unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1527) 		unsigned long nr_pages, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1528) 		struct vm_area_struct **vmas, int *locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1529) 		unsigned int foll_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1530) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1531) 	struct vm_area_struct *vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1532) 	unsigned long vm_flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1533) 	int i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1535) 	/* calculate required read or write permissions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1536) 	 * If FOLL_FORCE is set, we only require the "MAY" flags.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1537) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1538) 	vm_flags  = (foll_flags & FOLL_WRITE) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1539) 			(VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1540) 	vm_flags &= (foll_flags & FOLL_FORCE) ?
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1541) 			(VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1542) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1543) 	for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1544) 		vma = find_vma(mm, start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1545) 		if (!vma)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1546) 			goto finish_or_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1548) 		/* protect what we can, including chardevs */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1549) 		if ((vma->vm_flags & (VM_IO | VM_PFNMAP)) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1550) 		    !(vm_flags & vma->vm_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1551) 			goto finish_or_fault;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1552) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1553) 		if (pages) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1554) 			pages[i] = virt_to_page(start);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1555) 			if (pages[i])
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1556) 				get_page(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1557) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1558) 		if (vmas)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1559) 			vmas[i] = vma;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1560) 		start = (start + PAGE_SIZE) & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1561) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1563) 	return i;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1564) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1565) finish_or_fault:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1566) 	return i ? : -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1567) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1568) #endif /* !CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1569) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1570) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1571)  * get_dump_page() - pin user page in memory while writing it to core dump
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1572)  * @addr: user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1573)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1574)  * Returns struct page pointer of user page pinned for dump,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1575)  * to be freed afterwards by put_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1576)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1577)  * Returns NULL on any kind of failure - a hole must then be inserted into
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1578)  * the corefile, to preserve alignment with its headers; and also returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1579)  * NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1580)  * allowing a hole to be left in the corefile to save diskspace.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1581)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1582)  * Called without mmap_lock (takes and releases the mmap_lock by itself).
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1583)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1584) #ifdef CONFIG_ELF_CORE
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1585) struct page *get_dump_page(unsigned long addr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1586) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1587) 	struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1588) 	struct page *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1589) 	int locked = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1590) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1591) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1592) 	if (mmap_read_lock_killable(mm))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1593) 		return NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1594) 	ret = __get_user_pages_locked(mm, addr, 1, &page, NULL, &locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1595) 				      FOLL_FORCE | FOLL_DUMP | FOLL_GET);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1596) 	if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1597) 		mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1598) 	return (ret == 1) ? page : NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1599) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1600) #endif /* CONFIG_ELF_CORE */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1601) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1602) #ifdef CONFIG_CMA
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1603) static long check_and_migrate_cma_pages(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1604) 					unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1605) 					unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1606) 					struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1607) 					struct vm_area_struct **vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1608) 					unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1609) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1610) 	unsigned long i, isolation_error_count;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1611) 	bool drain_allow;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1612) 	LIST_HEAD(cma_page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1613) 	long ret = nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1614) 	struct page *prev_head, *head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1615) 	struct migration_target_control mtc = {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1616) 		.nid = NUMA_NO_NODE,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1617) 		.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1618) 	};
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1619) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1620) check_again:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1621) 	prev_head = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1622) 	isolation_error_count = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1623) 	drain_allow = true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1624) 	for (i = 0; i < nr_pages; i++) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1625) 		head = compound_head(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1626) 		if (head == prev_head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1627) 			continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1628) 		prev_head = head;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1629) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1630) 		 * If we get a page from the CMA zone, since we are going to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1631) 		 * be pinning these entries, we might as well move them out
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1632) 		 * of the CMA zone if possible.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1633) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1634) 		if (is_migrate_cma_page(head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1635) 			if (PageHuge(head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1636) 				if (!isolate_huge_page(head, &cma_page_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1637) 					isolation_error_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1638) 			} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1639) 				if (!PageLRU(head) && drain_allow) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1640) 					lru_add_drain_all();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1641) 					drain_allow = false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1642) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1643) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1644) 				if (isolate_lru_page(head)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1645) 					isolation_error_count++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1646) 					continue;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1647) 				}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1648) 				list_add_tail(&head->lru, &cma_page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1649) 				mod_node_page_state(page_pgdat(head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1650) 						    NR_ISOLATED_ANON +
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1651) 						    page_is_file_lru(head),
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1652) 						    thp_nr_pages(head));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1653) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1654) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1655) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1656) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1657) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1658) 	 * If list is empty, and no isolation errors, means that all pages are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1659) 	 * in the correct zone.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1660) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1661) 	if (list_empty(&cma_page_list) && !isolation_error_count)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1662) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1663) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1664) 	if (!list_empty(&cma_page_list)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1665) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1666) 		 * drop the above get_user_pages reference.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1667) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1668) 		if (gup_flags & FOLL_PIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1669) 			unpin_user_pages(pages, nr_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1670) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1671) 			for (i = 0; i < nr_pages; i++)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1672) 				put_page(pages[i]);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1673) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1674) 		ret = migrate_pages(&cma_page_list, alloc_migration_target,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1675) 				    NULL, (unsigned long)&mtc, MIGRATE_SYNC,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1676) 				    MR_CONTIG_RANGE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1677) 		if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1678) 			if (!list_empty(&cma_page_list))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1679) 				putback_movable_pages(&cma_page_list);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1680) 			return ret > 0 ? -ENOMEM : ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1681) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1682) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1683) 		/* We unpinned pages before migration, pin them again */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1684) 		ret = __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1685) 					      NULL, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1686) 		if (ret <= 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1687) 			return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1688) 		nr_pages = ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1689) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1690) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1691) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1692) 	 * check again because pages were unpinned, and we also might have
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1693) 	 * had isolation errors and need more pages to migrate.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1694) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1695) 	goto check_again;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1696) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1697) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1698) static long check_and_migrate_cma_pages(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1699) 					unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1700) 					unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1701) 					struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1702) 					struct vm_area_struct **vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1703) 					unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1704) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1705) 	return nr_pages;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1706) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1707) #endif /* CONFIG_CMA */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1708) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1709) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1710)  * __gup_longterm_locked() is a wrapper for __get_user_pages_locked which
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1711)  * allows us to process the FOLL_LONGTERM flag.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1712)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1713) static long __gup_longterm_locked(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1714) 				  unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1715) 				  unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1716) 				  struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1717) 				  struct vm_area_struct **vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1718) 				  unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1719) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1720) 	unsigned long flags = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1721) 	long rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1722) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1723) 	if (gup_flags & FOLL_LONGTERM)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1724) 		flags = memalloc_nocma_save();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1725) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1726) 	rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1727) 				     gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1728) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1729) 	if (gup_flags & FOLL_LONGTERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1730) 		if (rc > 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1731) 			rc = check_and_migrate_cma_pages(mm, start, rc, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1732) 							 vmas, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1733) 		memalloc_nocma_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1734) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1735) 	return rc;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1736) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1737) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1738) static bool is_valid_gup_flags(unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1739) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1740) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1741) 	 * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1742) 	 * never directly by the caller, so enforce that with an assertion:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1743) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1744) 	if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1745) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1746) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1747) 	 * FOLL_PIN is a prerequisite to FOLL_LONGTERM. Another way of saying
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1748) 	 * that is, FOLL_LONGTERM is a specific case, more restrictive case of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1749) 	 * FOLL_PIN.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1750) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1751) 	if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1752) 		return false;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1753) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1754) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1755) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1757) #ifdef CONFIG_MMU
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1758) static long __get_user_pages_remote(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1759) 				    unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1760) 				    unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1761) 				    struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1762) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1763) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1764) 	 * Parts of FOLL_LONGTERM behavior are incompatible with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1765) 	 * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1766) 	 * vmas. However, this only comes up if locked is set, and there are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1767) 	 * callers that do request FOLL_LONGTERM, but do not set locked. So,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1768) 	 * allow what we can.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1769) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1770) 	if (gup_flags & FOLL_LONGTERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1771) 		if (WARN_ON_ONCE(locked))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1772) 			return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1773) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1774) 		 * This will check the vmas (even if our vmas arg is NULL)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1775) 		 * and return -ENOTSUPP if DAX isn't allowed in this case:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1776) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1777) 		return __gup_longterm_locked(mm, start, nr_pages, pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1778) 					     vmas, gup_flags | FOLL_TOUCH |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1779) 					     FOLL_REMOTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1780) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1781) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1782) 	return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1783) 				       locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1784) 				       gup_flags | FOLL_TOUCH | FOLL_REMOTE);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1785) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1786) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1787) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1788)  * get_user_pages_remote() - pin user pages in memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1789)  * @mm:		mm_struct of target mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1790)  * @start:	starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1791)  * @nr_pages:	number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1792)  * @gup_flags:	flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1793)  * @pages:	array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1794)  *		Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1795)  *		only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1796)  * @vmas:	array of pointers to vmas corresponding to each page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1797)  *		Or NULL if the caller does not require them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1798)  * @locked:	pointer to lock flag indicating whether lock is held and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1799)  *		subsequently whether VM_FAULT_RETRY functionality can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1800)  *		utilised. Lock must initially be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1801)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1802)  * Returns either number of pages pinned (which may be less than the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1803)  * number requested), or an error. Details about the return value:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1804)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1805)  * -- If nr_pages is 0, returns 0.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1806)  * -- If nr_pages is >0, but no pages were pinned, returns -errno.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1807)  * -- If nr_pages is >0, and some pages were pinned, returns the number of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1808)  *    pages pinned. Again, this may be less than nr_pages.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1809)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1810)  * The caller is responsible for releasing returned @pages, via put_page().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1811)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1812)  * @vmas are valid only as long as mmap_lock is held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1813)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1814)  * Must be called with mmap_lock held for read or write.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1815)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1816)  * get_user_pages_remote walks a process's page tables and takes a reference
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1817)  * to each struct page that each user address corresponds to at a given
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1818)  * instant. That is, it takes the page that would be accessed if a user
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1819)  * thread accesses the given user virtual address at that instant.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1820)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1821)  * This does not guarantee that the page exists in the user mappings when
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1822)  * get_user_pages_remote returns, and there may even be a completely different
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1823)  * page there in some cases (eg. if mmapped pagecache has been invalidated
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1824)  * and subsequently re faulted). However it does guarantee that the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1825)  * won't be freed completely. And mostly callers simply care that the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1826)  * contains data that was valid *at some point in time*. Typically, an IO
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1827)  * or similar operation cannot guarantee anything stronger anyway because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1828)  * locks can't be held over the syscall boundary.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1829)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1830)  * If gup_flags & FOLL_WRITE == 0, the page must not be written to. If the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1831)  * is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1832)  * be called after the page is finished with, and before put_page is called.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1833)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1834)  * get_user_pages_remote is typically used for fewer-copy IO operations,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1835)  * to get a handle on the memory by some means other than accesses
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1836)  * via the user virtual addresses. The pages may be submitted for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1837)  * DMA to devices or accessed via their kernel linear mapping (via the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1838)  * kmap APIs). Care should be taken to use the correct cache flushing APIs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1839)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1840)  * See also get_user_pages_fast, for performance critical applications.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1841)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1842)  * get_user_pages_remote should be phased out in favor of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1843)  * get_user_pages_locked|unlocked or get_user_pages_fast. Nothing
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1844)  * should use get_user_pages_remote because it cannot pass
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1845)  * FAULT_FLAG_ALLOW_RETRY to handle_mm_fault.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1846)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1847) long get_user_pages_remote(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1848) 		unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1849) 		unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1850) 		struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1851) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1852) 	if (!is_valid_gup_flags(gup_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1853) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1854) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1855) 	return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1856) 				       pages, vmas, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1857) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1858) EXPORT_SYMBOL(get_user_pages_remote);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1859) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1860) #else /* CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1861) long get_user_pages_remote(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1862) 			   unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1863) 			   unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1864) 			   struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1865) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1866) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1867) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1868) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1869) static long __get_user_pages_remote(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1870) 				    unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1871) 				    unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1872) 				    struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1873) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1874) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1875) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1876) #endif /* !CONFIG_MMU */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1877) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1878) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1879)  * get_user_pages() - pin user pages in memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1880)  * @start:      starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1881)  * @nr_pages:   number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1882)  * @gup_flags:  flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1883)  * @pages:      array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1884)  *              Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1885)  *              only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1886)  * @vmas:       array of pointers to vmas corresponding to each page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1887)  *              Or NULL if the caller does not require them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1888)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1889)  * This is the same as get_user_pages_remote(), just with a less-flexible
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1890)  * calling convention where we assume that the mm being operated on belongs to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1891)  * the current task, and doesn't allow passing of a locked parameter.  We also
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1892)  * obviously don't pass FOLL_REMOTE in here.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1893)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1894) long get_user_pages(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1895) 		unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1896) 		struct vm_area_struct **vmas)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1898) 	if (!is_valid_gup_flags(gup_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1899) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1900) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1901) 	return __gup_longterm_locked(current->mm, start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1902) 				     pages, vmas, gup_flags | FOLL_TOUCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1903) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1904) EXPORT_SYMBOL(get_user_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1905) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1906) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1907)  * get_user_pages_locked() is suitable to replace the form:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1908)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1909)  *      mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1910)  *      do_something()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1911)  *      get_user_pages(mm, ..., pages, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1912)  *      mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1913)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1914)  *  to:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1915)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1916)  *      int locked = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1917)  *      mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1918)  *      do_something()
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1919)  *      get_user_pages_locked(mm, ..., pages, &locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1920)  *      if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1921)  *          mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1922)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1923)  * @start:      starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1924)  * @nr_pages:   number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1925)  * @gup_flags:  flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1926)  * @pages:      array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1927)  *              Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1928)  *              only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1929)  * @locked:     pointer to lock flag indicating whether lock is held and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1930)  *              subsequently whether VM_FAULT_RETRY functionality can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1931)  *              utilised. Lock must initially be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1932)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1933)  * We can leverage the VM_FAULT_RETRY functionality in the page fault
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1934)  * paths better by using either get_user_pages_locked() or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1935)  * get_user_pages_unlocked().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1936)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1937)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1938) long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1939) 			   unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1940) 			   int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1941) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1942) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1943) 	 * FIXME: Current FOLL_LONGTERM behavior is incompatible with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1944) 	 * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1945) 	 * vmas.  As there are no users of this flag in this call we simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1946) 	 * disallow this option for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1947) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1948) 	if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1949) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1950) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1951) 	 * FOLL_PIN must only be set internally by the pin_user_pages*() APIs,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1952) 	 * never directly by the caller, so enforce that:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1953) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1954) 	if (WARN_ON_ONCE(gup_flags & FOLL_PIN))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1955) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1957) 	return __get_user_pages_locked(current->mm, start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1958) 				       pages, NULL, locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1959) 				       gup_flags | FOLL_TOUCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1960) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1961) EXPORT_SYMBOL(get_user_pages_locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1962) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1963) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1964)  * get_user_pages_unlocked() is suitable to replace the form:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1965)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1966)  *      mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1967)  *      get_user_pages(mm, ..., pages, NULL);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1968)  *      mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1969)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1970)  *  with:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1971)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1972)  *      get_user_pages_unlocked(mm, ..., pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1973)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1974)  * It is functionally equivalent to get_user_pages_fast so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1975)  * get_user_pages_fast should be used instead if specific gup_flags
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1976)  * (e.g. FOLL_FORCE) are not required.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1977)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1978) long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1979) 			     struct page **pages, unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1980) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1981) 	struct mm_struct *mm = current->mm;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1982) 	int locked = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1983) 	long ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1984) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1985) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1986) 	 * FIXME: Current FOLL_LONGTERM behavior is incompatible with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1987) 	 * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1988) 	 * vmas.  As there are no users of this flag in this call we simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1989) 	 * disallow this option for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1990) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1991) 	if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1992) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1993) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1994) 	mmap_read_lock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1995) 	ret = __get_user_pages_locked(mm, start, nr_pages, pages, NULL,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1996) 				      &locked, gup_flags | FOLL_TOUCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1997) 	if (locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1998) 		mmap_read_unlock(mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 1999) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2000) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2001) EXPORT_SYMBOL(get_user_pages_unlocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2002) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2003) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2004)  * Fast GUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2005)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2006)  * get_user_pages_fast attempts to pin user pages by walking the page
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2007)  * tables directly and avoids taking locks. Thus the walker needs to be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2008)  * protected from page table pages being freed from under it, and should
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2009)  * block any THP splits.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2010)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2011)  * One way to achieve this is to have the walker disable interrupts, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2012)  * rely on IPIs from the TLB flushing code blocking before the page table
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2013)  * pages are freed. This is unsuitable for architectures that do not need
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2014)  * to broadcast an IPI when invalidating TLBs.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2015)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2016)  * Another way to achieve this is to batch up page table containing pages
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2017)  * belonging to more than one mm_user, then rcu_sched a callback to free those
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2018)  * pages. Disabling interrupts will allow the fast_gup walker to both block
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2019)  * the rcu_sched callback, and an IPI that we broadcast for splitting THPs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2020)  * (which is a relatively rare event). The code below adopts this strategy.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2021)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2022)  * Before activating this code, please be aware that the following assumptions
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2023)  * are currently made:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2024)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2025)  *  *) Either MMU_GATHER_RCU_TABLE_FREE is enabled, and tlb_remove_table() is used to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2026)  *  free pages containing page tables or TLB flushing requires IPI broadcast.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2027)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2028)  *  *) ptes can be read atomically by the architecture.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2029)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2030)  *  *) access_ok is sufficient to validate userspace address ranges.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2031)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2032)  * The last two assumptions can be relaxed by the addition of helper functions.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2033)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2034)  * This code is based heavily on the PowerPC implementation by Nick Piggin.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2035)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2036) #ifdef CONFIG_HAVE_FAST_GUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2037) #ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2038) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2039) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2040)  * WARNING: only to be used in the get_user_pages_fast() implementation.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2041)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2042)  * With get_user_pages_fast(), we walk down the pagetables without taking any
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2043)  * locks.  For this we would like to load the pointers atomically, but sometimes
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2044)  * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE).  What
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2045)  * we do have is the guarantee that a PTE will only either go from not present
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2046)  * to present, or present to not present or both -- it will not switch to a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2047)  * completely different present page without a TLB flush in between; something
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2048)  * that we are blocking by holding interrupts off.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2049)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2050)  * Setting ptes from not present to present goes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2051)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2052)  *   ptep->pte_high = h;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2053)  *   smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2054)  *   ptep->pte_low = l;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2055)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2056)  * And present to not present goes:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2057)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2058)  *   ptep->pte_low = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2059)  *   smp_wmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2060)  *   ptep->pte_high = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2061)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2062)  * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2063)  * We load pte_high *after* loading pte_low, which ensures we don't see an older
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2064)  * value of pte_high.  *Then* we recheck pte_low, which ensures that we haven't
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2065)  * picked up a changed pte high. We might have gotten rubbish values from
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2066)  * pte_low and pte_high, but we are guaranteed that pte_low will not have the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2067)  * present bit set *unless* it is 'l'. Because get_user_pages_fast() only
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2068)  * operates on present ptes we're safe.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2069)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2070) static inline pte_t gup_get_pte(pte_t *ptep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2071) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2072) 	pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2073) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2074) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2075) 		pte.pte_low = ptep->pte_low;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2076) 		smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2077) 		pte.pte_high = ptep->pte_high;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2078) 		smp_rmb();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2079) 	} while (unlikely(pte.pte_low != ptep->pte_low));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2080) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2081) 	return pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2082) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2083) #else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2084) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2085)  * We require that the PTE can be read atomically.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2086)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2087) static inline pte_t gup_get_pte(pte_t *ptep)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2088) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2089) 	return ptep_get(ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2090) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2091) #endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2092) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2093) static void __maybe_unused undo_dev_pagemap(int *nr, int nr_start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2094) 					    unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2095) 					    struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2096) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2097) 	while ((*nr) - nr_start) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2098) 		struct page *page = pages[--(*nr)];
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2099) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2100) 		ClearPageReferenced(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2101) 		if (flags & FOLL_PIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2102) 			unpin_user_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2103) 		else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2104) 			put_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2105) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2106) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2107) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2108) #ifdef CONFIG_ARCH_HAS_PTE_SPECIAL
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2109) static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2110) 			 unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2111) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2112) 	struct dev_pagemap *pgmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2113) 	int nr_start = *nr, ret = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2114) 	pte_t *ptep, *ptem;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2115) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2116) 	ptem = ptep = pte_offset_map(&pmd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2117) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2118) 		pte_t pte = gup_get_pte(ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2119) 		struct page *head, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2120) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2121) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2122) 		 * Similar to the PMD case below, NUMA hinting must take slow
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2123) 		 * path using the pte_protnone check.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2124) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2125) 		if (pte_protnone(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2126) 			goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2127) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2128) 		if (!pte_access_permitted(pte, flags & FOLL_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2129) 			goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2130) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2131) 		if (pte_devmap(pte)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2132) 			if (unlikely(flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2133) 				goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2134) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2135) 			pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2136) 			if (unlikely(!pgmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2137) 				undo_dev_pagemap(nr, nr_start, flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2138) 				goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2139) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2140) 		} else if (pte_special(pte))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2141) 			goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2142) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2143) 		VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2144) 		page = pte_page(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2145) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2146) 		head = try_grab_compound_head(page, 1, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2147) 		if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2148) 			goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2149) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2150) 		if (unlikely(pte_val(pte) != pte_val(*ptep))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2151) 			put_compound_head(head, 1, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2152) 			goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2153) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2154) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2155) 		VM_BUG_ON_PAGE(compound_head(page) != head, page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2156) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2157) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2158) 		 * We need to make the page accessible if and only if we are
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2159) 		 * going to access its content (the FOLL_PIN case).  Please
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2160) 		 * see Documentation/core-api/pin_user_pages.rst for
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2161) 		 * details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2162) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2163) 		if (flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2164) 			ret = arch_make_page_accessible(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2165) 			if (ret) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2166) 				unpin_user_page(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2167) 				goto pte_unmap;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2168) 			}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2169) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2170) 		SetPageReferenced(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2171) 		pages[*nr] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2172) 		(*nr)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2173) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2174) 	} while (ptep++, addr += PAGE_SIZE, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2175) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2176) 	ret = 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2177) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2178) pte_unmap:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2179) 	if (pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2180) 		put_dev_pagemap(pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2181) 	pte_unmap(ptem);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2182) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2183) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2184) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2185) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2186) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2187)  * If we can't determine whether or not a pte is special, then fail immediately
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2188)  * for ptes. Note, we can still pin HugeTLB and THP as these are guaranteed not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2189)  * to be special.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2190)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2191)  * For a futex to be placed on a THP tail page, get_futex_key requires a
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2192)  * get_user_pages_fast_only implementation that can pin pages. Thus it's still
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2193)  * useful to have gup_huge_pmd even if we can't operate on ptes.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2194)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2195) static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2196) 			 unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2197) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2198) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2199) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2200) #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2201) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2202) #if defined(CONFIG_ARCH_HAS_PTE_DEVMAP) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2203) static int __gup_device_huge(unsigned long pfn, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2204) 			     unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2205) 			     struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2206) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2207) 	int nr_start = *nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2208) 	struct dev_pagemap *pgmap = NULL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2209) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2210) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2211) 		struct page *page = pfn_to_page(pfn);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2212) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2213) 		pgmap = get_dev_pagemap(pfn, pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2214) 		if (unlikely(!pgmap)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2215) 			undo_dev_pagemap(nr, nr_start, flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2216) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2217) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2218) 		SetPageReferenced(page);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2219) 		pages[*nr] = page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2220) 		if (unlikely(!try_grab_page(page, flags))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2221) 			undo_dev_pagemap(nr, nr_start, flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2222) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2223) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2224) 		(*nr)++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2225) 		pfn++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2226) 	} while (addr += PAGE_SIZE, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2227) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2228) 	if (pgmap)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2229) 		put_dev_pagemap(pgmap);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2230) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2231) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2232) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2233) static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2234) 				 unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2235) 				 struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2236) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2237) 	unsigned long fault_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2238) 	int nr_start = *nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2239) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2240) 	fault_pfn = pmd_pfn(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2241) 	if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2242) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2243) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2244) 	if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2245) 		undo_dev_pagemap(nr, nr_start, flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2246) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2247) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2248) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2249) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2250) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2251) static int __gup_device_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2252) 				 unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2253) 				 struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2254) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2255) 	unsigned long fault_pfn;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2256) 	int nr_start = *nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2257) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2258) 	fault_pfn = pud_pfn(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2259) 	if (!__gup_device_huge(fault_pfn, addr, end, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2260) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2261) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2262) 	if (unlikely(pud_val(orig) != pud_val(*pudp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2263) 		undo_dev_pagemap(nr, nr_start, flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2264) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2265) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2266) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2267) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2268) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2269) static int __gup_device_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2270) 				 unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2271) 				 struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2272) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2273) 	BUILD_BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2274) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2275) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2276) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2277) static int __gup_device_huge_pud(pud_t pud, pud_t *pudp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2278) 				 unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2279) 				 struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2280) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2281) 	BUILD_BUG();
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2282) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2283) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2284) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2285) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2286) static int record_subpages(struct page *page, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2287) 			   unsigned long end, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2288) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2289) 	int nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2290) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2291) 	for (nr = 0; addr != end; addr += PAGE_SIZE)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2292) 		pages[nr++] = page++;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2293) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2294) 	return nr;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2295) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2296) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2297) #ifdef CONFIG_ARCH_HAS_HUGEPD
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2298) static unsigned long hugepte_addr_end(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2299) 				      unsigned long sz)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2300) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2301) 	unsigned long __boundary = (addr + sz) & ~(sz-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2302) 	return (__boundary - 1 < end - 1) ? __boundary : end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2303) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2304) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2305) static int gup_hugepte(pte_t *ptep, unsigned long sz, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2306) 		       unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2307) 		       struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2308) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2309) 	unsigned long pte_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2310) 	struct page *head, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2311) 	pte_t pte;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2312) 	int refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2313) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2314) 	pte_end = (addr + sz) & ~(sz-1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2315) 	if (pte_end < end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2316) 		end = pte_end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2317) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2318) 	pte = huge_ptep_get(ptep);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2319) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2320) 	if (!pte_access_permitted(pte, flags & FOLL_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2321) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2322) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2323) 	/* hugepages are never "special" */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2324) 	VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2325) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2326) 	head = pte_page(pte);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2327) 	page = head + ((addr & (sz-1)) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2328) 	refs = record_subpages(page, addr, end, pages + *nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2329) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2330) 	head = try_grab_compound_head(head, refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2331) 	if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2332) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2333) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2334) 	if (unlikely(pte_val(pte) != pte_val(*ptep))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2335) 		put_compound_head(head, refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2336) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2337) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2338) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2339) 	*nr += refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2340) 	SetPageReferenced(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2341) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2342) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2343) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2344) static int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2345) 		unsigned int pdshift, unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2346) 		struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2347) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2348) 	pte_t *ptep;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2349) 	unsigned long sz = 1UL << hugepd_shift(hugepd);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2350) 	unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2351) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2352) 	ptep = hugepte_offset(hugepd, addr, pdshift);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2353) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2354) 		next = hugepte_addr_end(addr, end, sz);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2355) 		if (!gup_hugepte(ptep, sz, addr, end, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2356) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2357) 	} while (ptep++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2358) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2359) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2360) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2361) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2362) static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2363) 		unsigned int pdshift, unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2364) 		struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2365) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2366) 	return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2367) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2368) #endif /* CONFIG_ARCH_HAS_HUGEPD */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2369) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2370) static int gup_huge_pmd(pmd_t orig, pmd_t *pmdp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2371) 			unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2372) 			struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2373) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2374) 	struct page *head, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2375) 	int refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2376) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2377) 	if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2378) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2379) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2380) 	if (pmd_devmap(orig)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2381) 		if (unlikely(flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2382) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2383) 		return __gup_device_huge_pmd(orig, pmdp, addr, end, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2384) 					     pages, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2385) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2386) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2387) 	page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2388) 	refs = record_subpages(page, addr, end, pages + *nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2389) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2390) 	head = try_grab_compound_head(pmd_page(orig), refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2391) 	if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2392) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2393) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2394) 	if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2395) 		put_compound_head(head, refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2396) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2397) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2398) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2399) 	*nr += refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2400) 	SetPageReferenced(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2401) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2402) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2403) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2404) static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2405) 			unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2406) 			struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2407) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2408) 	struct page *head, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2409) 	int refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2410) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2411) 	if (!pud_access_permitted(orig, flags & FOLL_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2412) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2413) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2414) 	if (pud_devmap(orig)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2415) 		if (unlikely(flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2416) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2417) 		return __gup_device_huge_pud(orig, pudp, addr, end, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2418) 					     pages, nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2419) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2420) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2421) 	page = pud_page(orig) + ((addr & ~PUD_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2422) 	refs = record_subpages(page, addr, end, pages + *nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2423) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2424) 	head = try_grab_compound_head(pud_page(orig), refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2425) 	if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2426) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2427) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2428) 	if (unlikely(pud_val(orig) != pud_val(*pudp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2429) 		put_compound_head(head, refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2430) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2431) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2432) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2433) 	*nr += refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2434) 	SetPageReferenced(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2435) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2436) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2437) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2438) static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2439) 			unsigned long end, unsigned int flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2440) 			struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2441) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2442) 	int refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2443) 	struct page *head, *page;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2444) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2445) 	if (!pgd_access_permitted(orig, flags & FOLL_WRITE))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2446) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2447) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2448) 	BUILD_BUG_ON(pgd_devmap(orig));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2449) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2450) 	page = pgd_page(orig) + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2451) 	refs = record_subpages(page, addr, end, pages + *nr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2452) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2453) 	head = try_grab_compound_head(pgd_page(orig), refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2454) 	if (!head)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2455) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2456) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2457) 	if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2458) 		put_compound_head(head, refs, flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2459) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2460) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2461) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2462) 	*nr += refs;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2463) 	SetPageReferenced(head);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2464) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2465) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2466) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2467) static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2468) 		unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2469) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2470) 	unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2471) 	pmd_t *pmdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2472) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2473) 	pmdp = pmd_offset_lockless(pudp, pud, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2474) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2475) 		pmd_t pmd = READ_ONCE(*pmdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2476) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2477) 		next = pmd_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2478) 		if (!pmd_present(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2479) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2480) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2481) 		if (unlikely(pmd_trans_huge(pmd) || pmd_huge(pmd) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2482) 			     pmd_devmap(pmd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2483) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2484) 			 * NUMA hinting faults need to be handled in the GUP
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2485) 			 * slowpath for accounting purposes and so that they
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2486) 			 * can be serialised against THP migration.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2487) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2488) 			if (pmd_protnone(pmd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2489) 				return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2490) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2491) 			if (!gup_huge_pmd(pmd, pmdp, addr, next, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2492) 				pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2493) 				return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2494) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2495) 		} else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2496) 			/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2497) 			 * architecture have different format for hugetlbfs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2498) 			 * pmd format and THP pmd format
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2499) 			 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2500) 			if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2501) 					 PMD_SHIFT, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2502) 				return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2503) 		} else if (!gup_pte_range(pmd, addr, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2504) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2505) 	} while (pmdp++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2506) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2507) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2508) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2509) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2510) static int gup_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2511) 			 unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2512) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2513) 	unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2514) 	pud_t *pudp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2515) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2516) 	pudp = pud_offset_lockless(p4dp, p4d, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2517) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2518) 		pud_t pud = READ_ONCE(*pudp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2519) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2520) 		next = pud_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2521) 		if (unlikely(!pud_present(pud)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2522) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2523) 		if (unlikely(pud_huge(pud))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2524) 			if (!gup_huge_pud(pud, pudp, addr, next, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2525) 					  pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2526) 				return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2527) 		} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2528) 			if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2529) 					 PUD_SHIFT, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2530) 				return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2531) 		} else if (!gup_pmd_range(pudp, pud, addr, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2532) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2533) 	} while (pudp++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2534) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2535) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2536) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2537) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2538) static int gup_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2539) 			 unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2540) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2541) 	unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2542) 	p4d_t *p4dp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2543) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2544) 	p4dp = p4d_offset_lockless(pgdp, pgd, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2545) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2546) 		p4d_t p4d = READ_ONCE(*p4dp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2547) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2548) 		next = p4d_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2549) 		if (p4d_none(p4d))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2550) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2551) 		BUILD_BUG_ON(p4d_huge(p4d));
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2552) 		if (unlikely(is_hugepd(__hugepd(p4d_val(p4d))))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2553) 			if (!gup_huge_pd(__hugepd(p4d_val(p4d)), addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2554) 					 P4D_SHIFT, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2555) 				return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2556) 		} else if (!gup_pud_range(p4dp, p4d, addr, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2557) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2558) 	} while (p4dp++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2559) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2560) 	return 1;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2561) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2562) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2563) static void gup_pgd_range(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2564) 		unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2565) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2566) 	unsigned long next;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2567) 	pgd_t *pgdp;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2568) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2569) 	pgdp = pgd_offset(current->mm, addr);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2570) 	do {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2571) 		pgd_t pgd = READ_ONCE(*pgdp);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2572) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2573) 		next = pgd_addr_end(addr, end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2574) 		if (pgd_none(pgd))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2575) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2576) 		if (unlikely(pgd_huge(pgd))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2577) 			if (!gup_huge_pgd(pgd, pgdp, addr, next, flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2578) 					  pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2579) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2580) 		} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2581) 			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2582) 					 PGDIR_SHIFT, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2583) 				return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2584) 		} else if (!gup_p4d_range(pgdp, pgd, addr, next, flags, pages, nr))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2585) 			return;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2586) 	} while (pgdp++, addr = next, addr != end);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2587) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2588) #else
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2589) static inline void gup_pgd_range(unsigned long addr, unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2590) 		unsigned int flags, struct page **pages, int *nr)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2591) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2592) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2593) #endif /* CONFIG_HAVE_FAST_GUP */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2594) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2595) #ifndef gup_fast_permitted
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2596) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2597)  * Check if it's allowed to use get_user_pages_fast_only() for the range, or
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2598)  * we need to fall back to the slow version:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2599)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2600) static bool gup_fast_permitted(unsigned long start, unsigned long end)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2601) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2602) 	return true;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2603) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2604) #endif
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2605) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2606) static int __gup_longterm_unlocked(unsigned long start, int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2607) 				   unsigned int gup_flags, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2608) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2609) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2610) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2611) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2612) 	 * FIXME: FOLL_LONGTERM does not work with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2613) 	 * get_user_pages_unlocked() (see comments in that function)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2614) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2615) 	if (gup_flags & FOLL_LONGTERM) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2616) 		mmap_read_lock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2617) 		ret = __gup_longterm_locked(current->mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2618) 					    start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2619) 					    pages, NULL, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2620) 		mmap_read_unlock(current->mm);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2621) 	} else {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2622) 		ret = get_user_pages_unlocked(start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2623) 					      pages, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2624) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2625) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2626) 	return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2627) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2628) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2629) static unsigned long lockless_pages_from_mm(unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2630) 					    unsigned long end,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2631) 					    unsigned int gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2632) 					    struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2633) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2634) 	unsigned long flags;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2635) 	int nr_pinned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2636) 	unsigned seq;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2637) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2638) 	if (!IS_ENABLED(CONFIG_HAVE_FAST_GUP) ||
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2639) 	    !gup_fast_permitted(start, end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2640) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2641) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2642) 	if (gup_flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2643) 		seq = raw_read_seqcount(&current->mm->write_protect_seq);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2644) 		if (seq & 1)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2645) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2646) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2647) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2648) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2649) 	 * Disable interrupts. The nested form is used, in order to allow full,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2650) 	 * general purpose use of this routine.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2651) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2652) 	 * With interrupts disabled, we block page table pages from being freed
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2653) 	 * from under us. See struct mmu_table_batch comments in
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2654) 	 * include/asm-generic/tlb.h for more details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2655) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2656) 	 * We do not adopt an rcu_read_lock() here as we also want to block IPIs
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2657) 	 * that come from THPs splitting.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2658) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2659) 	local_irq_save(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2660) 	gup_pgd_range(start, end, gup_flags, pages, &nr_pinned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2661) 	local_irq_restore(flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2662) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2663) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2664) 	 * When pinning pages for DMA there could be a concurrent write protect
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2665) 	 * from fork() via copy_page_range(), in this case always fail fast GUP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2666) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2667) 	if (gup_flags & FOLL_PIN) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2668) 		if (read_seqcount_retry(&current->mm->write_protect_seq, seq)) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2669) 			unpin_user_pages(pages, nr_pinned);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2670) 			return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2671) 		}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2672) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2673) 	return nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2674) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2675) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2676) static int internal_get_user_pages_fast(unsigned long start,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2677) 					unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2678) 					unsigned int gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2679) 					struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2680) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2681) 	unsigned long len, end;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2682) 	unsigned long nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2683) 	int ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2684) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2685) 	if (WARN_ON_ONCE(gup_flags & ~(FOLL_WRITE | FOLL_LONGTERM |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2686) 				       FOLL_FORCE | FOLL_PIN | FOLL_GET |
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2687) 				       FOLL_FAST_ONLY)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2688) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2689) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2690) 	if (gup_flags & FOLL_PIN)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2691) 		atomic_set(&current->mm->has_pinned, 1);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2692) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2693) 	if (!(gup_flags & FOLL_FAST_ONLY))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2694) 		might_lock_read(&current->mm->mmap_lock);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2695) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2696) 	start = untagged_addr(start) & PAGE_MASK;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2697) 	len = nr_pages << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2698) 	if (check_add_overflow(start, len, &end))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2699) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2700) 	if (unlikely(!access_ok((void __user *)start, len)))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2701) 		return -EFAULT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2702) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2703) 	nr_pinned = lockless_pages_from_mm(start, end, gup_flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2704) 	if (nr_pinned == nr_pages || gup_flags & FOLL_FAST_ONLY)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2705) 		return nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2706) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2707) 	/* Slow path: try to get the remaining pages with get_user_pages */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2708) 	start += nr_pinned << PAGE_SHIFT;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2709) 	pages += nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2710) 	ret = __gup_longterm_unlocked(start, nr_pages - nr_pinned, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2711) 				      pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2712) 	if (ret < 0) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2713) 		/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2714) 		 * The caller has to unpin the pages we already pinned so
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2715) 		 * returning -errno is not an option
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2716) 		 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2717) 		if (nr_pinned)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2718) 			return nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2719) 		return ret;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2720) 	}
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2721) 	return ret + nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2722) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2723) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2724) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2725)  * get_user_pages_fast_only() - pin user pages in memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2726)  * @start:      starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2727)  * @nr_pages:   number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2728)  * @gup_flags:  flags modifying pin behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2729)  * @pages:      array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2730)  *              Should be at least nr_pages long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2731)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2732)  * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2733)  * the regular GUP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2734)  * Note a difference with get_user_pages_fast: this always returns the
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2735)  * number of pages pinned, 0 if no pages were pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2736)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2737)  * If the architecture does not support this function, simply return with no
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2738)  * pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2739)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2740)  * Careful, careful! COW breaking can go either way, so a non-write
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2741)  * access can get ambiguous page results. If you call this function without
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2742)  * 'write' set, you'd better be sure that you're ok with that ambiguity.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2743)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2744) int get_user_pages_fast_only(unsigned long start, int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2745) 			     unsigned int gup_flags, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2746) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2747) 	int nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2748) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2749) 	 * Internally (within mm/gup.c), gup fast variants must set FOLL_GET,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2750) 	 * because gup fast is always a "pin with a +1 page refcount" request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2751) 	 *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2752) 	 * FOLL_FAST_ONLY is required in order to match the API description of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2753) 	 * this routine: no fall back to regular ("slow") GUP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2754) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2755) 	gup_flags |= FOLL_GET | FOLL_FAST_ONLY;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2756) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2757) 	nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2758) 						 pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2759) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2760) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2761) 	 * As specified in the API description above, this routine is not
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2762) 	 * allowed to return negative values. However, the common core
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2763) 	 * routine internal_get_user_pages_fast() *can* return -errno.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2764) 	 * Therefore, correct for that here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2765) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2766) 	if (nr_pinned < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2767) 		nr_pinned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2768) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2769) 	return nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2770) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2771) EXPORT_SYMBOL_GPL(get_user_pages_fast_only);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2772) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2773) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2774)  * get_user_pages_fast() - pin user pages in memory
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2775)  * @start:      starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2776)  * @nr_pages:   number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2777)  * @gup_flags:  flags modifying pin behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2778)  * @pages:      array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2779)  *              Should be at least nr_pages long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2780)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2781)  * Attempt to pin user pages in memory without taking mm->mmap_lock.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2782)  * If not successful, it will fall back to taking the lock and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2783)  * calling get_user_pages().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2784)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2785)  * Returns number of pages pinned. This may be fewer than the number requested.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2786)  * If nr_pages is 0 or negative, returns 0. If no pages were pinned, returns
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2787)  * -errno.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2788)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2789) int get_user_pages_fast(unsigned long start, int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2790) 			unsigned int gup_flags, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2791) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2792) 	if (!is_valid_gup_flags(gup_flags))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2793) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2794) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2795) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2796) 	 * The caller may or may not have explicitly set FOLL_GET; either way is
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2797) 	 * OK. However, internally (within mm/gup.c), gup fast variants must set
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2798) 	 * FOLL_GET, because gup fast is always a "pin with a +1 page refcount"
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2799) 	 * request.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2800) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2801) 	gup_flags |= FOLL_GET;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2802) 	return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2803) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2804) EXPORT_SYMBOL_GPL(get_user_pages_fast);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2805) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2806) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2807)  * pin_user_pages_fast() - pin user pages in memory without taking locks
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2808)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2809)  * @start:      starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2810)  * @nr_pages:   number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2811)  * @gup_flags:  flags modifying pin behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2812)  * @pages:      array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2813)  *              Should be at least nr_pages long.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2814)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2815)  * Nearly the same as get_user_pages_fast(), except that FOLL_PIN is set. See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2816)  * get_user_pages_fast() for documentation on the function arguments, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2817)  * the arguments here are identical.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2818)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2819)  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2820)  * see Documentation/core-api/pin_user_pages.rst for further details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2821)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2822) int pin_user_pages_fast(unsigned long start, int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2823) 			unsigned int gup_flags, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2824) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2825) 	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2826) 	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2827) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2828) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2829) 	gup_flags |= FOLL_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2830) 	return internal_get_user_pages_fast(start, nr_pages, gup_flags, pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2831) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2832) EXPORT_SYMBOL_GPL(pin_user_pages_fast);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2833) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2834) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2835)  * This is the FOLL_PIN equivalent of get_user_pages_fast_only(). Behavior
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2836)  * is the same, except that this one sets FOLL_PIN instead of FOLL_GET.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2837)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2838)  * The API rules are the same, too: no negative values may be returned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2839)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2840) int pin_user_pages_fast_only(unsigned long start, int nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2841) 			     unsigned int gup_flags, struct page **pages)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2842) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2843) 	int nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2844) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2845) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2846) 	 * FOLL_GET and FOLL_PIN are mutually exclusive. Note that the API
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2847) 	 * rules require returning 0, rather than -errno:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2848) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2849) 	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2850) 		return 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2851) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2852) 	 * FOLL_FAST_ONLY is required in order to match the API description of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2853) 	 * this routine: no fall back to regular ("slow") GUP.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2854) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2855) 	gup_flags |= (FOLL_PIN | FOLL_FAST_ONLY);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2856) 	nr_pinned = internal_get_user_pages_fast(start, nr_pages, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2857) 						 pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2858) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2859) 	 * This routine is not allowed to return negative values. However,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2860) 	 * internal_get_user_pages_fast() *can* return -errno. Therefore,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2861) 	 * correct for that here:
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2862) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2863) 	if (nr_pinned < 0)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2864) 		nr_pinned = 0;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2865) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2866) 	return nr_pinned;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2867) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2868) EXPORT_SYMBOL_GPL(pin_user_pages_fast_only);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2869) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2870) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2871)  * pin_user_pages_remote() - pin pages of a remote process
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2872)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2873)  * @mm:		mm_struct of target mm
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2874)  * @start:	starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2875)  * @nr_pages:	number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2876)  * @gup_flags:	flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2877)  * @pages:	array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2878)  *		Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2879)  *		only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2880)  * @vmas:	array of pointers to vmas corresponding to each page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2881)  *		Or NULL if the caller does not require them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2882)  * @locked:	pointer to lock flag indicating whether lock is held and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2883)  *		subsequently whether VM_FAULT_RETRY functionality can be
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2884)  *		utilised. Lock must initially be held.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2885)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2886)  * Nearly the same as get_user_pages_remote(), except that FOLL_PIN is set. See
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2887)  * get_user_pages_remote() for documentation on the function arguments, because
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2888)  * the arguments here are identical.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2889)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2890)  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2891)  * see Documentation/core-api/pin_user_pages.rst for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2892)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2893) long pin_user_pages_remote(struct mm_struct *mm,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2894) 			   unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2895) 			   unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2896) 			   struct vm_area_struct **vmas, int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2897) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2898) 	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2899) 	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2900) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2901) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2902) 	gup_flags |= FOLL_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2903) 	return __get_user_pages_remote(mm, start, nr_pages, gup_flags,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2904) 				       pages, vmas, locked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2905) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2906) EXPORT_SYMBOL(pin_user_pages_remote);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2907) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2908) /**
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2909)  * pin_user_pages() - pin user pages in memory for use by other devices
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2910)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2911)  * @start:	starting user address
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2912)  * @nr_pages:	number of pages from start to pin
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2913)  * @gup_flags:	flags modifying lookup behaviour
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2914)  * @pages:	array that receives pointers to the pages pinned.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2915)  *		Should be at least nr_pages long. Or NULL, if caller
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2916)  *		only intends to ensure the pages are faulted in.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2917)  * @vmas:	array of pointers to vmas corresponding to each page.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2918)  *		Or NULL if the caller does not require them.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2919)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2920)  * Nearly the same as get_user_pages(), except that FOLL_TOUCH is not set, and
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2921)  * FOLL_PIN is set.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2922)  *
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2923)  * FOLL_PIN means that the pages must be released via unpin_user_page(). Please
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2924)  * see Documentation/core-api/pin_user_pages.rst for details.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2925)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2926) long pin_user_pages(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2927) 		    unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2928) 		    struct vm_area_struct **vmas)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2929) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2930) 	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2931) 	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2932) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2933) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2934) 	gup_flags |= FOLL_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2935) 	return __gup_longterm_locked(current->mm, start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2936) 				     pages, vmas, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2937) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2938) EXPORT_SYMBOL(pin_user_pages);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2939) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2940) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2941)  * pin_user_pages_unlocked() is the FOLL_PIN variant of
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2942)  * get_user_pages_unlocked(). Behavior is the same, except that this one sets
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2943)  * FOLL_PIN and rejects FOLL_GET.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2944)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2945) long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2946) 			     struct page **pages, unsigned int gup_flags)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2947) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2948) 	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2949) 	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2950) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2951) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2952) 	gup_flags |= FOLL_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2953) 	return get_user_pages_unlocked(start, nr_pages, pages, gup_flags);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2954) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2955) EXPORT_SYMBOL(pin_user_pages_unlocked);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2956) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2957) /*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2958)  * pin_user_pages_locked() is the FOLL_PIN variant of get_user_pages_locked().
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2959)  * Behavior is the same, except that this one sets FOLL_PIN and rejects
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2960)  * FOLL_GET.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2961)  */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2962) long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2963) 			   unsigned int gup_flags, struct page **pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2964) 			   int *locked)
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2965) {
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2966) 	/*
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2967) 	 * FIXME: Current FOLL_LONGTERM behavior is incompatible with
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2968) 	 * FAULT_FLAG_ALLOW_RETRY because of the FS DAX check requirement on
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2969) 	 * vmas.  As there are no users of this flag in this call we simply
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2970) 	 * disallow this option for now.
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2971) 	 */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2972) 	if (WARN_ON_ONCE(gup_flags & FOLL_LONGTERM))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2973) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2974) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2975) 	/* FOLL_GET and FOLL_PIN are mutually exclusive. */
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2976) 	if (WARN_ON_ONCE(gup_flags & FOLL_GET))
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2977) 		return -EINVAL;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2978) 
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2979) 	gup_flags |= FOLL_PIN;
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2980) 	return __get_user_pages_locked(current->mm, start, nr_pages,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2981) 				       pages, NULL, locked,
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2982) 				       gup_flags | FOLL_TOUCH);
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2983) }
^8f3ce5b39 (kx 2023-10-28 12:00:06 +0300 2984) EXPORT_SYMBOL(pin_user_pages_locked);